Re: [PATCH OLK-5.10] mm/zram: add lz4k algorithm

30 Jun 2023

在 2023/6/30 10:39, Kefeng Wang 写道:
...
整体的代码风格都刷新下，按照linux的要求来吧
正在修改。
...
On 2023/6/30 10:41, Tu Jinjiang wrote:
...
hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7H9IA
CVE: NA
-------------------------------------------
Add lz4k algorithm support for zram.
有没有原始作者信息和commit信息，要保留
我这边没有这些信息。
...
...
Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: Tu Jinjiang <tujinjiang@huawei.com>
---
  crypto/Kconfig                 |   8 +
  crypto/Makefile                |   1 +
  crypto/lz4k.c                  |  97 ++++++
  drivers/block/zram/zcomp.c     |   3 +
  include/linux/lz4k.h           | 383 +++++++++++++++++++++++
  lib/Kconfig                    |   6 +
  lib/Makefile                   |   2 +
  lib/lz4k/Makefile              |   2 +
  lib/lz4k/lz4k_decode.c         | 308 +++++++++++++++++++
  lib/lz4k/lz4k_encode.c         | 539 +++++++++++++++++++++++++++++++++
  lib/lz4k/lz4k_encode_private.h | 137 +++++++++
  lib/lz4k/lz4k_private.h        | 269 ++++++++++++++++
  12 files changed, 1755 insertions(+)
  create mode 100644 crypto/lz4k.c
  create mode 100644 include/linux/lz4k.h
  create mode 100644 lib/lz4k/Makefile
  create mode 100644 lib/lz4k/lz4k_decode.c
  create mode 100644 lib/lz4k/lz4k_encode.c
  create mode 100644 lib/lz4k/lz4k_encode_private.h
  create mode 100644 lib/lz4k/lz4k_private.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 64cb304f5103..35223cff7c8a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1871,6 +1871,14 @@ config CRYPTO_LZ4HC
      help
        This is the LZ4 high compression mode algorithm.
  +config CRYPTO_LZ4K
+    tristate "LZ4K compression algorithm"
+    select CRYPTO_ALGAPI
+    select LZ4K_COMPRESS
+    select LZ4K_DECOMPRESS
+    help
+      This is the LZ4K algorithm.
+
  config CRYPTO_ZSTD
      tristate "Zstd compression algorithm"
      select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index 9d1191f2b741..5c3b0a0839c5 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -161,6 +161,7 @@ obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o 
authencesn.o
  obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
  obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
  obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
+obj-$(CONFIG_CRYPTO_LZ4K) += lz4k.o
  obj-$(CONFIG_CRYPTO_XXHASH) += xxhash_generic.o
  obj-$(CONFIG_CRYPTO_842) += 842.o
  obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/lz4k.c b/crypto/lz4k.c
new file mode 100644
index 000000000000..8daceab269ef
--- /dev/null
+++ b/crypto/lz4k.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights 
reserved.
+ * Description: LZ4K compression algorithm for ZRAM
+ * Author: Arkhipov Denis arkhipov.denis@huawei.com
+ * Create: 2020-03-25
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <linux/lz4k.h>
+
+
+struct lz4k_ctx {
+    void *lz4k_comp_mem;
+};
+
+static int lz4k_init(struct crypto_tfm *tfm)
+{
+    struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm);
+
+    ctx->lz4k_comp_mem = vmalloc(lz4k_encode_state_bytes_min());
+    if (!ctx->lz4k_comp_mem)
+        return -ENOMEM;
+
+    return 0;
+}
+
+static void lz4k_exit(struct crypto_tfm *tfm)
+{
+    struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm);
+    vfree(ctx->lz4k_comp_mem);
+}
+
+static int lz4k_compress_crypto(struct crypto_tfm *tfm, const u8 
*src, unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+    struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm);
+    int ret;
+
+    ret = lz4k_encode(ctx->lz4k_comp_mem, src, dst, slen, *dlen, 0);
+
去掉空行
...
+    if (ret < 0) {
+        return -EINVAL;
+    }
去掉括号
+
+    if (ret)
+        *dlen = ret;
+
+    return 0;
+}
+
+static int lz4k_decompress_crypto(struct crypto_tfm *tfm, const u8 
*src, unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+    int ret;
+
+    ret = lz4k_decode(src, dst, slen, *dlen);
+
去空行
+    if (ret <= 0)
+        return -EINVAL;
加空行
+    *dlen = ret;
+    return 0;
+}
+
+static struct crypto_alg alg_lz4k = {
+    .cra_name        = "lz4k",
+    .cra_driver_name = "lz4k-generic",
+    .cra_flags       = CRYPTO_ALG_TYPE_COMPRESS,
+    .cra_ctxsize     = sizeof(struct lz4k_ctx),
+    .cra_module      = THIS_MODULE,
+    .cra_list        = LIST_HEAD_INIT(alg_lz4k.cra_list),
+    .cra_init        = lz4k_init,
+    .cra_exit        = lz4k_exit,
+    .cra_u           = {
+        .compress = {
+            .coa_compress    = lz4k_compress_crypto,
+            .coa_decompress  = lz4k_decompress_crypto
+        }
+    }
+};
+
+static int __init lz4k_mod_init(void)
+{
+    return crypto_register_alg(&alg_lz4k);
+}
+
+static void __exit lz4k_mod_fini(void)
+{
+    crypto_unregister_alg(&alg_lz4k);
+}
+
+module_init(lz4k_mod_init);
+module_exit(lz4k_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4K Compression Algorithm");
+MODULE_ALIAS_CRYPTO("lz4k");
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index b08650417bf0..28bda2035326 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -29,6 +29,9 @@ static const char * const backends[] = {
  #if IS_ENABLED(CONFIG_CRYPTO_ZSTD)
      "zstd",
  #endif
+#if IS_ENABLED(CONFIG_CRYPTO_LZ4K)
+    "lz4k",
+#endif
  };
    static void zcomp_strm_free(struct zcomp_strm *zstrm)
diff --git a/include/linux/lz4k.h b/include/linux/lz4k.h
new file mode 100644
index 000000000000..6e73161b1840
--- /dev/null
+++ b/include/linux/lz4k.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights 
reserved.
+ * Description: LZ4K compression algorithm
+ * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
+ * Created: 2020-03-25
+ */
+
+#ifndef _LZ4K_H
+#define _LZ4K_H
+
+/* file lz4k.h
+  This file contains the platform-independent API of LZ-class
+  lossless codecs (compressors/decompressors) with complete
+  in-place documentation.  The documentation is formatted
+  in accordance with DOXYGEN mark-up format.  So, one can
+  generate proper documentation, e.g. in HTML format, using DOXYGEN.
+
+  Currently, LZ-class codecs, documented here, implement following
+  algorithms for lossless data compression/decompression:
+  \li "LZ HUAWEI" proprietary codec competing with LZ4 - lz4k_encode(),
+  lz4k_encode_delta(), lz4k_decode(), lz4k_decode_delta()
+
+  The LZ HUAWEI compressors accept any data as input and compress it
+  without loss to a smaller size if possible.
+  Compressed data produced by LZ HUAWEI compressor API lz4k_encode*(),
+  can be decompressed only by lz4k_decode() API documented below.\n
+  */
+
+/*
+  lz4k_status defines simple set of status values returned by Huawei 
APIs
+ */
各种Huawei 都改成 lz4k之类的  算法本身不要保留 LZ HUAWEI之类的
...
+typedef enum {
+    LZ4K_STATUS_INCOMPRESSIBLE =  0, /* !< Return when data is 
incompressible */
+    LZ4K_STATUS_FAILED         = -1, /* !< Return on general failure */
+    LZ4K_STATUS_READ_ERROR =     -2, /* !< Return when data reading 
failed */
+    LZ4K_STATUS_WRITE_ERROR =    -3  /* !< Return when data writing 
failed */
+} lz4k_status;
+
+/*
+  LZ4K_Version() returns static unmutable string with algorithm version
+ */
+const char *lz4k_version(void);
+
+/*
+  lz4k_encode_state_bytes_min() returns number of bytes for state 
parameter,
+  supplied to lz4k_encode(), lz4k_encode_delta(),
+  lz4k_update_delta_state().
+  So, state should occupy at least lz4k_encode_state_bytes_min() for 
mentioned
+  functions to work correctly.
+ */
+unsigned lz4k_encode_state_bytes_min(void);
下面的注释风格之类要改下；或者删掉一些无用的
...
+
+/*
+  lz4k_encode() encodes/compresses one input buffer at *in, places
+  result of encoding into one output buffer at *out if encoded data
+  size fits specified values of out_max and out_limit.
+  It returs size of encoded data in case of success or value<=0 
otherwise.
+  The result of successful encoding is in HUAWEI proprietary format, 
that
+  is the encoded data can be decoded only by lz4k_decode().
+
+  \return
+    \li positive value\n
+      if encoding was successful. The value returned is the size of 
encoded
+      (compressed) data always <=out_max.
+    \li non-positive value\n
+      if in==0||in_max==0||out==0||out_max==0 or
+      if out_max is less than needed for encoded (compressed) data.
+    \li 0 value\n
+      if encoded data size >= out_limit
+
+  \param[in] state
+    !=0, pointer to state buffer used internally by the function.  
Size of
+    state in bytes should be at least 
lz4k_encode_state_bytes_min().  The content
+    of state buffer will be changed during encoding.
+
+  \param[in] in
+    !=0, pointer to the input buffer to encode (compress).  The 
content of
+    the input buffer does not change during encoding.
+
+  \param[in] out
+    !=0, pointer to the output buffer where to place result of encoding
+    (compression).
+    If encoding is unsuccessful, e.g. out_max or out_limit are less 
than
+    needed for encoded data then content of out buffer may be 
arbitrary.
+
+  \param[in] in_max
+    !=0, size in bytes of the input buffer at *in
+
+  \param[in] out_max
+    !=0, size in bytes of the output buffer at *out
+
+  \param[in] out_limit
+    encoded data size soft limit in bytes. Due to performance 
reasons it is
+    not guaranteed that
+    lz4k_encode will always detect that resulting encoded data size is
+    bigger than out_limit.
+    Hovewer, when reaching out_limit is detected, lz4k_encode() returns
+    earlier and spares CPU cycles.  Caller code should recheck result
+    returned by lz4k_encode() (value greater than 0) if it is really
+    less or equal than out_limit.
+    out_limit is ignored if it is equal to 0.
+ */
+int lz4k_encode(
+    void *const state,
+    const void *const in,
+    void *out,
+    unsigned in_max,
+    unsigned out_max,
+    unsigned out_limit);
+
+/*
+  lz4k_encode_max_cr() encodes/compresses one input buffer at *in, 
places
+  result of encoding into one output buffer at *out if encoded data
+  size fits specified value of out_max.
+  It returs size of encoded data in case of success or value<=0 
otherwise.
+  The result of successful encoding is in HUAWEI proprietary format, 
that
+  is the encoded data can be decoded only by lz4k_decode().
+
+  \return
+    \li positive value\n
+      if encoding was successful. The value returned is the size of 
encoded
+      (compressed) data always <=out_max.
+    \li non-positive value\n
+      if in==0||in_max==0||out==0||out_max==0 or
+      if out_max is less than needed for encoded (compressed) data.
+
+  \param[in] state
+    !=0, pointer to state buffer used internally by the function.  
Size of
+    state in bytes should be at least 
lz4k_encode_state_bytes_min().  The content
+    of state buffer will be changed during encoding.
+
+  \param[in] in
+    !=0, pointer to the input buffer to encode (compress).  The 
content of
+    the input buffer does not change during encoding.
+
+  \param[in] out
+    !=0, pointer to the output buffer where to place result of encoding
+    (compression).
+    If encoding is unsuccessful, e.g. out_max is less than
+    needed for encoded data then content of out buffer may be 
arbitrary.
+
+  \param[in] in_max
+    !=0, size in bytes of the input buffer at *in
+
+  \param[in] out_max
+    !=0, size in bytes of the output buffer at *out
+
+  \param[in] out_limit
+    encoded data size soft limit in bytes. Due to performance 
reasons it is
+    not guaranteed that
+    lz4k_encode will always detect that resulting encoded data size is
+    bigger than out_limit.
+    Hovewer, when reaching out_limit is detected, lz4k_encode() returns
+    earlier and spares CPU cycles.  Caller code should recheck result
+    returned by lz4k_encode() (value greater than 0) if it is really
+    less or equal than out_limit.
+    out_limit is ignored if it is equal to 0.
+ */
+int lz4k_encode_max_cr(
+    void *const state,
+    const void *const in,
+    void *out,
+    unsigned in_max,
+    unsigned out_max,
+    unsigned out_limit);
+
+/*
+  lz4k_update_delta_state() fills/updates state (hash table) in the 
same way as
+  lz4k_encode does while encoding (compressing).
+  The state and its content can then be used by lz4k_encode_delta()
+  to encode (compress) data more efficiently.
+  By other words, effect of lz4k_update_delta_state() is the same as
+  lz4k_encode() with all encoded output discarded.
+
+  Example sequence of calls for lz4k_update_delta_state and
+  lz4k_encode_delta:
+    //dictionary (1st) block
+    int result0=lz4k_update_delta_state(state, in0, in0, in_max0);
+//delta (2nd) block
+    int result1=lz4k_encode_delta(state, in0, in, out, in_max,
+                                       out_max);
+
+  \param[in] state
+    !=0, pointer to state buffer used internally by lz4k_encode*.
+    Size of state in bytes should be at least 
lz4k_encode_state_bytes_min().
+    The content of state buffer is zeroed at the beginning of
+    lz4k_update_delta_state ONLY when in0==in.
+    The content of state buffer will be changed inside
+    lz4k_update_delta_state.
+
+  \param[in] in0
+    !=0, pointer to the reference/dictionary input buffer that was used
+    as input to preceding call of lz4k_encode() or 
lz4k_update_delta_state()
+    to fill/update the state buffer.
+    The content of the reference/dictionary input buffer does not 
change
+    during encoding.
+    The in0 is needed for use-cases when there are several 
dictionary and
+    input blocks interleaved, e.g.
+ <dictionaryA><inputA><dictionaryB><inputB>..., or
+    <dictionaryA><dictionaryB><inputAB>..., etc.
+
+  \param[in] in
+    !=0, pointer to the input buffer to fill/update state as if 
encoding
+    (compressing) this input.  This input buffer is also called 
dictionary
+    input buffer.
+    The content of the input buffer does not change during encoding.
+    The two buffers - at in0 and at in - should be contiguous in 
memory.
+    That is, the last byte of buffer at in0 is located exactly 
before byte
+    at in.
+
+  \param[in] in_max
+    !=0, size in bytes of the input buffer at in.
+ */
+int lz4k_update_delta_state(
+    void *const state,
+    const void *const in0,
+    const void *const in,
+    unsigned in_max);
+
+/*
+  lz4k_encode_delta() encodes (compresses) data from one input buffer
+  using one reference buffer as dictionary and places the result of
+  compression into one output buffer.
+  The result of successful compression is in HUAWEI proprietary 
format, so
+  that compressed data can be decompressed only by lz4k_decode_delta().
+  Reference/dictionary buffer and input buffer should be contiguous in
+  memory.
+
+  Example sequence of calls for lz4k_update_delta_state and
+  lz4k_encode_delta:
+//dictionary (1st) block
+    int result0=lz4k_update_delta_state(state, in0, in0, in_max0);
+//delta (2nd) block
+    int result1=lz4k_encode_delta(state, in0, in, out, in_max,
+                                       out_max);
+
+  Example sequence of calls for lz4k_encode and lz4k_encode_delta:
+//dictionary (1st) block
+    int result0=lz4k_encode(state, in0, out0, in_max0, out_max0);
+//delta (2nd) block
+    int result1=lz4k_encode_delta(state, in0, in, out, in_max,
+                                       out_max);
+
+  \return
+    \li positive value\n
+      if encoding was successful. The value returned is the size of 
encoded
+      (compressed) data.
+    \li non-positive value\n
+      if state==0||in0==0||in==0||in_max==0||out==0||out_max==0 or
+      if out_max is less than needed for encoded (compressed) data.
+
+  \param[in] state
+    !=0, pointer to state buffer used internally by the function.  
Size of
+    state in bytes should be at least 
lz4k_encode_state_bytes_min().  For more
+    efficient encoding the state buffer may be filled/updated by 
calling
+    lz4k_update_delta_state() or lz4k_encode() before 
lz4k_encode_delta().
+    The content of state buffer is zeroed at the beginning of
+    lz4k_encode_delta() ONLY when in0==in.
+    The content of state will be changed during encoding.
+
+  \param[in] in0
+    !=0, pointer to the reference/dictionary input buffer that was 
used as
+    input to preceding call of lz4k_encode() or 
lz4k_update_delta_state() to
+    fill/update the state buffer.
+    The content of the reference/dictionary input buffer does not 
change
+    during encoding.
+
+  \param[in] in
+    !=0, pointer to the input buffer to encode (compress).  The 
input buffer
+    is compressed using content of the reference/dictionary input 
buffer at
+    in0. The content of the input buffer does not change during 
encoding.
+    The two buffers - at *in0 and at *in - should be contiguous in 
memory.
+    That is, the last byte of buffer at *in0 is located exactly 
before byte
+    at *in.
+
+  \param[in] out
+    !=0, pointer to the output buffer where to place result of encoding
+    (compression). If compression is unsuccessful then content of out
+    buffer may be arbitrary.
+
+  \param[in] in_max
+    !=0, size in bytes of the input buffer at *in
+
+  \param[in] out_max
+    !=0, size in bytes of the output buffer at *out.
+ */
+int lz4k_encode_delta(
+    void *const state,
+    const void *const in0,
+    const void *const in,
+    void *out,
+    unsigned in_max,
+    unsigned out_max);
+
+/*
+  lz4k_decode() decodes (decompresses) data from one input buffer 
and places
+  the result of decompression into one output buffer.  The encoded 
data in input
+  buffer should be in HUAWEI proprietary format, produced by 
lz4k_encode()
+  or by lz4k_encode_delta().
+
+  \return
+    \li positive value\n
+      if decoding was successful. The value returned is the size of 
decoded
+      (decompressed) data.
+    \li non-positive value\n
+      if in==0||in_max==0||out==0||out_max==0 or
+      if out_max is less than needed for decoded (decompressed) data or
+      if input encoded data format is corrupted.
+
+  \param[in] in
+    !=0, pointer to the input buffer to decode (decompress). The 
content of
+    the input buffer does not change during decoding.
+
+  \param[in] out
+    !=0, pointer to the output buffer where to place result of decoding
+    (decompression). If decompression is unsuccessful then content 
of out
+    buffer may be arbitrary.
+
+  \param[in] in_max
+    !=0, size in bytes of the input buffer at in
+
+  \param[in] out_max
+    !=0, size in bytes of the output buffer at out
+ */
+int lz4k_decode(
+    const void *const in,
+    void *const out,
+    unsigned in_max,
+    unsigned out_max);
+
+/*
+  lz4k_decode_delta() decodes (decompresses) data from one input buffer
+  and places the result of decompression into one output buffer.  The
+  compressed data in input buffer should be in format, produced by
+  lz4k_encode_delta().
+
+  Example sequence of calls for lz4k_decode and lz4k_decode_delta:
+//dictionary (1st) block
+    int result0=lz4k_decode(in0, out0, in_max0, out_max0);
+//delta (2nd) block
+    int result1=lz4k_decode_delta(in, out0, out, in_max, out_max);
+
+  \return
+    \li positive value\n
+      if decoding was successful. The value returned is the size of 
decoded
+      (decompressed) data.
+    \li non-positive value\n
+      if in==0||in_max==0||out==0||out_max==0 or
+      if out_max is less than needed for decoded (decompressed) data or
+      if input data format is corrupted.
+
+  \param[in] in
+    !=0, pointer to the input buffer to decode (decompress). The 
content of
+    the input buffer does not change during decoding.
+
+  \param[in] out0
+    !=0, pointer to the dictionary input buffer that was used as 
input to
+    lz4k_update_delta_state() to fill/update the state buffer. The 
content
+    of the dictionary input buffer does not change during decoding.
+
+  \param[in] out
+    !=0, pointer to the output buffer where to place result of decoding
+    (decompression). If decompression is unsuccessful then content 
of out
+    buffer may be arbitrary.
+    The two buffers - at *out0 and at *out - should be contiguous in 
memory.
+    That is, the last byte of buffer at *out0 is located exactly 
before byte
+    at *out.
+
+  \param[in] in_max
+    !=0, size in bytes of the input buffer at *in
+
+  \param[in] out_max
+    !=0, size in bytes of the output buffer at *out
+ */
+int lz4k_decode_delta(
+    const void *in,
+    const void *const out0,
+    void *const out,
+    unsigned in_max,
+    unsigned out_max);
+
+
+#endif /* _LZ4K_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 36326864249d..4bf1c2c21157 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -310,6 +310,12 @@ config LZ4HC_COMPRESS
  config LZ4_DECOMPRESS
      tristate
  +config LZ4K_COMPRESS
+    tristate
+
+config LZ4K_DECOMPRESS
+    tristate
+
  config ZSTD_COMPRESS
      select XXHASH
      tristate
diff --git a/lib/Makefile b/lib/Makefile
index a803e1527c4b..bd0d3635ae46 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -187,6 +187,8 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
  obj-$(CONFIG_LZ4_COMPRESS) += lz4/
  obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/
  obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/
+obj-$(CONFIG_LZ4K_COMPRESS) += lz4k/
+obj-$(CONFIG_LZ4K_DECOMPRESS) += lz4k/
  obj-$(CONFIG_ZSTD_COMPRESS) += zstd/
  obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/
  obj-$(CONFIG_XZ_DEC) += xz/
diff --git a/lib/lz4k/Makefile b/lib/lz4k/Makefile
new file mode 100644
index 000000000000..6ea3578639d4
--- /dev/null
+++ b/lib/lz4k/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_LZ4K_COMPRESS) += lz4k_encode.o
+obj-$(CONFIG_LZ4K_DECOMPRESS) += lz4k_decode.o
\ No newline at end of file
diff --git a/lib/lz4k/lz4k_decode.c b/lib/lz4k/lz4k_decode.c
new file mode 100644
index 000000000000..567b76b7bc51
--- /dev/null
+++ b/lib/lz4k/lz4k_decode.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights 
reserved.
+ * Description: LZ4K compression algorithm
+ * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
+ * Created: 2020-03-25
+ */
+
+#if !defined(__KERNEL__)
+#include "lz4k.h"
+#else
+#include <linux/lz4k.h>
+#include <linux/module.h>
+#endif
+
+#include "lz4k_private.h" /* types, etc */
+
+static const uint8_t *get_size(
+    uint_fast32_t *size,
+    const uint8_t *in_at,
+    const uint8_t *const in_end)
+{
+    uint_fast32_t u;
+    do {
+        if (unlikely(in_at >= in_end))
+            return NULL;
+        *size += (u = *(const uint8_t*)in_at);
+        ++in_at;
+    } while (BYTE_MAX == u);
+    return in_at;
+}
+
+static int end_of_block(
+    const uint_fast32_t nr_bytes_max,
+    const uint_fast32_t r_bytes_max,
+    const uint8_t *const in_at,
+    const uint8_t *const in_end,
+    const uint8_t *const out,
+    const uint8_t *const out_at)
+{
+    if (!nr_bytes_max)
+        return LZ4K_STATUS_FAILED; /* should be the last one in 
block */
+    if (r_bytes_max != REPEAT_MIN)
+        return LZ4K_STATUS_FAILED; /* should be the last one in 
block */
+    if (in_at != in_end)
+        return LZ4K_STATUS_FAILED; /* should be the last one in 
block */
+    return (int)(out_at - out);
+}
+
+enum {
+    NR_COPY_MIN = 16,
+    R_COPY_MIN = 16,
+    R_COPY_SAFE = R_COPY_MIN - 1,
+    R_COPY_SAFE_2X = (R_COPY_MIN << 1) - 1
+};
+
+static bool out_non_repeat(
+    const uint8_t **in_at,
+    uint8_t **out_at,
+    uint_fast32_t nr_bytes_max,
+    const uint8_t *const in_end,
+    const uint8_t *const out_end)
+{
+    const uint8_t *const in_copy_end = *in_at + nr_bytes_max;
+    uint8_t *const out_copy_end = *out_at + nr_bytes_max;
+    if (likely(nr_bytes_max <= NR_COPY_MIN)) {
+        if (likely(*in_at <= in_end - NR_COPY_MIN &&
+               *out_at <= out_end - NR_COPY_MIN))
+            m_copy(*out_at, *in_at, NR_COPY_MIN);
+        else if (in_copy_end <= in_end && out_copy_end <= out_end)
+            m_copy(*out_at, *in_at, nr_bytes_max);
+        else
+            return false;
+    } else {
+        if (likely(in_copy_end <= in_end - NR_COPY_MIN &&
+               out_copy_end <= out_end - NR_COPY_MIN)) {
+            m_copy(*out_at, *in_at, NR_COPY_MIN);
+            copy_x_while_lt(*out_at + NR_COPY_MIN,
+                    *in_at + NR_COPY_MIN,
+                    out_copy_end, NR_COPY_MIN);
+        } else if (in_copy_end <= in_end && out_copy_end <= out_end) {
+            m_copy(*out_at, *in_at, nr_bytes_max);
+        } else { /* in_copy_end > in_end || out_copy_end > out_end */
+            return false;
+        }
+    }
+    *in_at = in_copy_end;
+    *out_at = out_copy_end;
+    return true;
+}
+
+static void out_repeat_overlap(
+    uint_fast32_t offset,
+    uint8_t *out_at,
+    const uint8_t *out_from,
+    const uint8_t *const out_copy_end)
+{ /* (1 < offset < R_COPY_MIN/2) && out_copy_end + R_COPY_SAFE_2X  
<= out_end */
+    enum {
+        COPY_MIN = R_COPY_MIN >> 1,
+        OFFSET_LIMIT = COPY_MIN >> 1
+    };
+    m_copy(out_at, out_from, COPY_MIN);
+    out_at += offset;
+    if (offset <= OFFSET_LIMIT)
+        offset <<= 1;
+    do {
+        m_copy(out_at, out_from, COPY_MIN);
+        out_at += offset;
+        if (offset <= OFFSET_LIMIT)
+            offset <<= 1;
+    } while (out_at - out_from < R_COPY_MIN);
+    while_lt_copy_2x_as_x2(out_at, out_from, out_copy_end, R_COPY_MIN);
+}
+
+static bool out_repeat_slow(
+    uint_fast32_t r_bytes_max,
+    uint_fast32_t offset,
+    uint8_t *out_at,
+    const uint8_t *out_from,
+    const uint8_t *const out_copy_end,
+    const uint8_t *const out_end)
+{
+    if (offset > 1 && out_copy_end <= out_end - R_COPY_SAFE_2X) {
+        out_repeat_overlap(offset, out_at, out_from, out_copy_end);
+    } else {
+        if (unlikely(out_copy_end > out_end))
+            return false;
+        if (offset == 1) {
+            m_set(out_at, *out_from, r_bytes_max);
+        } else {
+            do
+                *out_at++ = *out_from++;
+            while (out_at < out_copy_end);
+        }
+    }
+    return true;
+}
+
+static int decode(
+    const uint8_t *const in,
+    const uint8_t *const out0,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    const uint8_t *const out_end,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2)
+{
+    const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
+    const uint8_t *in_at = in;
+    const uint8_t *const in_end_minus_x = in_end - TAG_BYTES_MAX;
+    uint8_t *out_at = out;
+    while (likely(in_at <= in_end_minus_x)) {
+        const uint_fast32_t utag = read4_at(in_at - 1) >> BYTE_BITS;
+        const uint_fast32_t offset = utag & mask(off_log2);
+        uint_fast32_t nr_bytes_max = utag >> (off_log2 + r_log2),
+                  r_bytes_max = ((utag >> off_log2) & mask(r_log2)) +
+                        REPEAT_MIN;
+        const uint8_t *out_from = 0;
+        uint8_t *out_copy_end = 0;
+        in_at += TAG_BYTES_MAX;
+        if (unlikely(nr_bytes_max == mask(nr_log2))) {
+            in_at = get_size(&nr_bytes_max, in_at, in_end);
+            if (in_at == NULL)
+                return LZ4K_STATUS_READ_ERROR;
+        }
+        if (!out_non_repeat(&in_at, &out_at, nr_bytes_max, in_end, 
out_end))
+            return LZ4K_STATUS_FAILED;
+        if (unlikely(r_bytes_max == mask(r_log2) + REPEAT_MIN)) {
+            in_at = get_size(&r_bytes_max, in_at, in_end);
+            if (in_at == NULL)
+                return LZ4K_STATUS_READ_ERROR;
+        }
+        out_from = out_at - offset;
+        if (unlikely(out_from < out0))
+            return LZ4K_STATUS_FAILED;
+        out_copy_end = out_at + r_bytes_max;
+        if (likely(offset >= R_COPY_MIN &&
+               out_copy_end <= out_end - R_COPY_SAFE_2X)) {
+            copy_2x_as_x2_while_lt(out_at, out_from, out_copy_end,
+                           R_COPY_MIN);
+        } else if (likely(offset >= (R_COPY_MIN >> 1) &&
+                  out_copy_end <= out_end - R_COPY_SAFE_2X)) {
+            m_copy(out_at, out_from, R_COPY_MIN);
+            out_at += offset;
+            while_lt_copy_x(out_at, out_from, out_copy_end, 
R_COPY_MIN);
+            /* faster than 2x */
+        } else if (likely(offset > 0)) {
+            if (!out_repeat_slow(r_bytes_max, offset, out_at, out_from,
+                 out_copy_end, out_end))
+                return LZ4K_STATUS_FAILED;
+        } else { /* offset == 0: EOB, last literal */
+            return end_of_block(nr_bytes_max, r_bytes_max, in_at,
+                        in_end, out, out_at);
+        }
+        out_at = out_copy_end;
+    }
+    return in_at == in_end ? (int)(out_at - out) : LZ4K_STATUS_FAILED;
+}
+
+static int decode4kb(
+    const uint8_t *const in,
+    const uint8_t *const out0,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    const uint8_t *const out_end)
+{
+    enum {
+        NR_LOG2 = 6
+    };
+    return decode(in, out0, out, in_end, out_end, NR_LOG2, 
BLOCK_4KB_LOG2);
+}
+
+static int decode8kb(
+    const uint8_t *const in,
+    const uint8_t *const out0,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    const uint8_t *const out_end)
+{
+    enum {
+        NR_LOG2 = 5
+    };
+    return decode(in, out0, out, in_end, out_end, NR_LOG2, 
BLOCK_8KB_LOG2);
+}
+
+static int decode16kb(
+    const uint8_t *const in,
+    const uint8_t *const out0,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    const uint8_t *const out_end)
+{
+    enum {
+        NR_LOG2 = 5
+    };
+    return decode(in, out0, out, in_end, out_end, NR_LOG2, 
BLOCK_16KB_LOG2);
+}
+
+static int decode32kb(
+    const uint8_t *const in,
+    const uint8_t *const out0,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    const uint8_t *const out_end)
+{
+    enum {
+        NR_LOG2 = 4
+    };
+    return decode(in, out0, out, in_end, out_end, NR_LOG2, 
BLOCK_32KB_LOG2);
+}
+
+static int decode64kb(
+    const uint8_t *const in,
+    const uint8_t *const out0,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    const uint8_t *const out_end)
+{
+    enum {
+        NR_LOG2 = 4
+    };
+    return decode(in, out0, out, in_end, out_end, NR_LOG2, 
BLOCK_64KB_LOG2);
+}
+
+static inline const void *u8_inc(const uint8_t *a)
+{
+    return a+1;
+}
+
+int lz4k_decode(
+    const void *in,
+    void *const out,
+    unsigned in_max,
+    unsigned out_max)
+{
+    /* ++use volatile pointers to prevent compiler optimizations */
+    const uint8_t *volatile in_end = (const uint8_t*)in + in_max;
+    const uint8_t *volatile out_end = (uint8_t*)out + out_max;
+    uint8_t in_log2 = 0;
+    if (unlikely(in == NULL || out == NULL || in_max <= 4 || out_max 
<= 0))
+        return LZ4K_STATUS_FAILED;
+    in_log2 = (uint8_t)(BLOCK_4KB_LOG2 + *(const uint8_t*)in);
+    /* invalid buffer size or pointer overflow */
+    if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= 
out_end))
+        return LZ4K_STATUS_FAILED;
+    /* -- */
+    in = u8_inc((const uint8_t*)in);
+    --in_max;
+    if (in_log2 < BLOCK_8KB_LOG2)
+        return decode4kb((const uint8_t*)in, (uint8_t*)out,
+                 (uint8_t*)out, in_end, out_end);
+    if (in_log2 == BLOCK_8KB_LOG2)
+        return decode8kb((const uint8_t*)in, (uint8_t*)out,
+                 (uint8_t*)out, in_end, out_end);
+    if (in_log2 == BLOCK_16KB_LOG2)
+        return decode16kb((const uint8_t*)in, (uint8_t*)out,
+                  (uint8_t*)out, in_end, out_end);
+    if (in_log2 == BLOCK_32KB_LOG2)
+        return decode32kb((const uint8_t*)in, (uint8_t*)out,
+                  (uint8_t*)out, in_end, out_end);
+    if (in_log2 == BLOCK_64KB_LOG2)
+        return decode64kb((const uint8_t*)in, (uint8_t*)out,
+                  (uint8_t*)out, in_end, out_end);
+    return LZ4K_STATUS_FAILED;
+}
+EXPORT_SYMBOL(lz4k_decode);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("LZ4K decoder");
diff --git a/lib/lz4k/lz4k_encode.c b/lib/lz4k/lz4k_encode.c
new file mode 100644
index 000000000000..a425d3a0b827
--- /dev/null
+++ b/lib/lz4k/lz4k_encode.c
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights 
reserved.
+ * Description: LZ4K compression algorithm
+ * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
+ * Created: 2020-03-25
+ */
+
+#if !defined(__KERNEL__)
+#include "lz4k.h"
+#else
+#include <linux/lz4k.h>
+#include <linux/module.h>
+#endif
+
+#include "lz4k_private.h"
+#include "lz4k_encode_private.h"
+
+static uint8_t *out_size_bytes(uint8_t *out_at, uint_fast32_t u)
+{
+    for (; unlikely(u >= BYTE_MAX); u -= BYTE_MAX)
+        *out_at++ = (uint8_t)BYTE_MAX;
+    *out_at++ = (uint8_t)u;
+    return out_at;
+}
+
+static inline uint8_t *out_utag_then_bytes_left(
+    uint8_t *out_at,
+    uint_fast32_t utag,
+    uint_fast32_t bytes_left)
+{
+    m_copy(out_at, &utag, TAG_BYTES_MAX);
+    return out_size_bytes(out_at + TAG_BYTES_MAX, bytes_left);
+}
+
+static int out_tail(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    const uint8_t *const out,
+    const uint8_t *const nr0,
+    const uint8_t *const in_end,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    bool check_out)
+{
+    const uint_fast32_t nr_mask = mask(nr_log2);
+    const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
+    const uint_fast32_t nr_bytes_max = u_32(in_end - nr0);
+    if (encoded_bytes_min(nr_log2, nr_bytes_max) > u_32(out_end - 
out_at))
+        return check_out ? LZ4K_STATUS_WRITE_ERROR :
+                   LZ4K_STATUS_INCOMPRESSIBLE;
+    if (nr_bytes_max < nr_mask) {
+        /* caller guarantees at least one nr-byte */
+        uint_fast32_t utag = (nr_bytes_max << (off_log2 + r_log2));
+        m_copy(out_at, &utag, TAG_BYTES_MAX);
+        out_at += TAG_BYTES_MAX;
+    } else {
+        uint_fast32_t bytes_left = nr_bytes_max - nr_mask;
+        uint_fast32_t utag = (nr_mask << (off_log2 + r_log2));
+        out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
+    }
+    m_copy(out_at, nr0, nr_bytes_max);
+    return (int)(out_at + nr_bytes_max - out);
+}
+
+int lz4k_out_tail(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    const uint8_t *const out,
+    const uint8_t *const nr0,
+    const uint8_t *const in_end,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    bool check_out)
+{
+    return out_tail(out_at, out_end, out, nr0, in_end,
+            nr_log2, off_log2, check_out);
+}
+
+static uint8_t *out_non_repeat(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t utag,
+    const uint8_t *const nr0,
+    const uint8_t *const r,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    bool check_out)
+{
+    const uint_fast32_t nr_bytes_max = u_32(r - nr0);
+    const uint_fast32_t nr_mask = mask(nr_log2),
+        r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
+    if (likely(nr_bytes_max < nr_mask)) {
+        if (unlikely(check_out &&
+            TAG_BYTES_MAX + nr_bytes_max > u_32(out_end - out_at)))
+            return NULL;
+        utag |= (nr_bytes_max << (off_log2 + r_log2));
+        m_copy(out_at, &utag, TAG_BYTES_MAX);
+        out_at += TAG_BYTES_MAX;
+    } else {
+        uint_fast32_t bytes_left = nr_bytes_max - nr_mask;
+        if (unlikely(check_out &&
+            TAG_BYTES_MAX + size_bytes_count(bytes_left) + 
nr_bytes_max >
+            u_32(out_end - out_at)))
+            return NULL;
+        utag |= (nr_mask << (off_log2 + r_log2));
+        out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
+    }
+    if (unlikely(check_out))
+        m_copy(out_at, nr0, nr_bytes_max);
+    else
+        copy_x_while_total(out_at, nr0, nr_bytes_max, NR_COPY_MIN);
+    out_at += nr_bytes_max;
+    return out_at;
+}
+
+uint8_t *lz4k_out_non_repeat(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t utag,
+    const uint8_t *const nr0,
+    const uint8_t *const r,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    bool check_out)
+{
+     return out_non_repeat(out_at, out_end, utag, nr0, r,
+                nr_log2, off_log2, check_out);
+}
+
+static uint8_t *out_r_bytes_left(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t r_bytes_max,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    const bool check_out) /* =false when
+out_max>=encoded_bytes_max(in_max), =true otherwise */
+{
+    const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + 
nr_log2));
+    if (unlikely(r_bytes_max - REPEAT_MIN >= r_mask)) {
+        uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask;
+        if (unlikely(check_out &&
+            size_bytes_count(bytes_left) > u_32(out_end - out_at)))
+            return NULL;
+        out_at = out_size_bytes(out_at, bytes_left);
+    }
+    return out_at; /* SUCCESS: continue compression */
+}
+
+uint8_t *lz4k_out_r_bytes_left(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t r_bytes_max,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    const bool check_out)
+{
+    return out_r_bytes_left(out_at, out_end, r_bytes_max,
+                nr_log2, off_log2, check_out);
+}
+
+static uint8_t *out_repeat(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t utag,
+    uint_fast32_t r_bytes_max,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    const bool check_out) /* =false when
+out_max>=encoded_bytes_max(in_max), =true otherwise */
+{
+    const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + 
nr_log2));
+    if (likely(r_bytes_max - REPEAT_MIN < r_mask)) {
+        if (unlikely(check_out && TAG_BYTES_MAX > u_32(out_end - 
out_at)))
+            return NULL;
+        utag |= ((r_bytes_max - REPEAT_MIN) << off_log2);
+        m_copy(out_at, &utag, TAG_BYTES_MAX);
+        out_at += TAG_BYTES_MAX;
+    } else {
+        uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask;
+        if (unlikely(check_out &&
+            TAG_BYTES_MAX + size_bytes_count(bytes_left) >
+            u_32(out_end - out_at)))
+            return NULL;
+        utag |= (r_mask << off_log2);
+        out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
+    }
+    return out_at; /* SUCCESS: continue compression */
+}
+
+uint8_t *lz4k_out_repeat(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t utag,
+    uint_fast32_t r_bytes_max,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    const bool check_out)
+{
+    return out_repeat(out_at, out_end, utag, r_bytes_max,
+              nr_log2, off_log2, check_out);
+}
+
+static const uint8_t *repeat_end(
+    const uint8_t *q,
+    const uint8_t *r,
+    const uint8_t *const in_end_safe,
+    const uint8_t *const in_end)
+{
+    q += REPEAT_MIN;
+    r += REPEAT_MIN;
+    /* caller guarantees r+12<=in_end */
+    do {
+        const uint64_t x = read8_at(q) ^ read8_at(r);
+        if (x) {
+            const uint16_t ctz = (uint16_t)__builtin_ctzl(x);
+            return r + (ctz >> BYTE_BITS_LOG2);
+        }
+        /* some bytes differ:+ count of trailing 0-bits/bytes */
+        q += sizeof(uint64_t), r += sizeof(uint64_t);
+    } while (likely(r <= in_end_safe)); /* once, at input block end */
+    do {
+        if (*q != *r) return r;
+        ++q;
+        ++r;
+    } while (r < in_end);
+    return r;
+}
+
+const uint8_t *lz4k_repeat_end(
+    const uint8_t *q,
+    const uint8_t *r,
+    const uint8_t *const in_end_safe,
+    const uint8_t *const in_end)
+{
+    return repeat_end(q, r, in_end_safe, in_end);
+}
+
+enum {
+    HT_BYTES_LOG2 = HT_LOG2 + 1
+};
+
+inline unsigned encode_state_bytes_min(void)
+{
+    unsigned bytes_total = (1U << HT_BYTES_LOG2);
+    return bytes_total;
+}
+
+#if !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR)
+
+unsigned lz4k_encode_state_bytes_min(void)
+{
+    return encode_state_bytes_min();
+}
+EXPORT_SYMBOL(lz4k_encode_state_bytes_min);
+
+#endif /* !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) */
+
+/* CR increase order: +STEP, have OFFSETS, use _5b */
+/* *_6b to compete with LZ4 */
+static inline uint_fast32_t hash0_v(const uint64_t r, uint32_t shift)
+{
+    return hash64v_6b(r, shift);
+}
+
+static inline uint_fast32_t hash0(const uint8_t *r, uint32_t shift)
+{
+    return hash64_6b(r, shift);
+}
+
+/*
+ * Proof that 'r' increments are safe-NO pointer overflows are 
possible:
+ *
+ * While using STEP_LOG2=5, step_start=1<<STEP_LOG2 == 32 we 
increment s
+ * 32 times by 1, 32 times by 2, 32 times by 3, and so on:
+ * 32*1+32*2+32*3+...+32*31 == 32*SUM(1..31) == 32*((1+31)*15+16).
+ * So, we can safely increment s by at most 31 for input block size <=
+ * 1<<13 < 15872.
+ *
+ * More precisely, STEP_LIMIT == x for any input block calculated as 
follows:
+ * 1<<off_log2 >= (1<<STEP_LOG2)*((x+1)(x-1)/2+x/2) ==>
+ * 1<<(off_log2-STEP_LOG2+1) >= x^2+x-1 ==>
+ * x^2+x-1-1<<(off_log2-STEP_LOG2+1) == 0, which is solved by standard
+ * method.
+ * To avoid overhead here conservative approximate value of x is 
calculated
+ * as average of two nearest square roots, see STEP_LIMIT above.
+ */
+
+enum {
+    STEP_LOG2 = 5 /* increase for better CR */
+};
+
+static int encode_any(
+    uint16_t *const ht0,
+    const uint8_t *const in0,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    uint8_t *const out_end, /* ==out_limit for !check_out */
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    const bool check_out)
+{   /* caller guarantees off_log2 <=16 */
+    uint8_t *out_at = out;
+    const uint8_t *const in_end_safe = in_end - NR_COPY_MIN;
+    const uint8_t *r = in0;
+    const uint8_t *nr0 = r++;
+    uint_fast32_t step = 1 << STEP_LOG2;
+    for (;;) {
+        uint_fast32_t utag = 0;
+        const uint8_t *r_end = 0;
+        uint_fast32_t r_bytes_max = 0;
+        const uint8_t *const q = hashed(in0, ht0, hash0(r, HT_LOG2), 
r);
+        if (!equal4(q, r)) {
+            r += (++step >> STEP_LOG2);
+            if (unlikely(r > in_end_safe))
+                return out_tail(out_at, out_end, out, nr0, in_end,
+                        nr_log2, off_log2, check_out);
+            continue;
+        }
+        utag = u_32(r - q);
+        r_end = repeat_end(q, r, in_end_safe, in_end);
+        r = repeat_start(q, r, nr0, in0);
+        r_bytes_max = u_32(r_end - r);
+        if (nr0 == r) {
+            out_at = out_repeat(out_at, out_end, utag, r_bytes_max,
+                        nr_log2, off_log2, check_out);
+        } else {
+            update_utag(r_bytes_max, &utag, nr_log2, off_log2);
+            out_at = out_non_repeat(out_at, out_end, utag, nr0, r,
+                        nr_log2, off_log2, check_out);
+            if (unlikely(check_out && out_at == NULL))
+                return LZ4K_STATUS_WRITE_ERROR;
+            out_at = out_r_bytes_left(out_at, out_end, r_bytes_max,
+                          nr_log2, off_log2, check_out);
+        }
+        if (unlikely(check_out && out_at == NULL))
+            return LZ4K_STATUS_WRITE_ERROR;
+        nr0 = (r += r_bytes_max);
+        if (unlikely(r > in_end_safe))
+            return r == in_end ? (int)(out_at - out) :
+                out_tail(out_at, out_end, out, r, in_end,
+                     nr_log2, off_log2, check_out);
+        ht0[hash0(r - 1 - 1, HT_LOG2)] = (uint16_t)(r - 1 - 1 - in0);
+        step = 1 << STEP_LOG2;
+    }
+}
+
+static int encode_fast(
+    uint16_t *const ht,
+    const uint8_t *const in,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    uint8_t *const out_end, /* ==out_limit for !check_out */
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2)
+{ /* caller guarantees off_log2 <=16 */
+    return encode_any(ht, in, out, in_end, out_end, nr_log2, off_log2,
+        false); /* !check_out */
+}
+
+static int encode_slow(
+    uint16_t *const ht,
+    const uint8_t *const in,
+    uint8_t *const out,
+    const uint8_t *const in_end,
+    uint8_t *const out_end, /* ==out_limit for !check_out */
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2)
+{ /* caller guarantees off_log2 <=16 */
+    return encode_any(ht, in, out, in_end, out_end, nr_log2, off_log2,
+        true); /* check_out */
+}
+
+static int encode4kb(
+    uint16_t *const state,
+    const uint8_t *const in,
+    uint8_t *out,
+    const uint_fast32_t in_max,
+    const uint_fast32_t out_max,
+    const uint_fast32_t out_limit)
+{
+    enum {
+        NR_LOG2 = 6
+    };
+    const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ?
+        encode_slow(state, in, out, in + in_max, out + out_max,
+            NR_LOG2, BLOCK_4KB_LOG2) :
+        encode_fast(state, in, out, in + in_max, out + out_limit,
+            NR_LOG2, BLOCK_4KB_LOG2);
+    return result <= 0 ? result : result + 1; /* +1 for in_log2 */
+}
+
+static int encode8kb(
+    uint16_t *const state,
+    const uint8_t *const in,
+    uint8_t *out,
+    const uint_fast32_t in_max,
+    const uint_fast32_t out_max,
+    const uint_fast32_t out_limit)
+{
+    enum {
+        NR_LOG2 = 5
+    };
+    const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ?
+        encode_slow(state, in, out, in + in_max, out + out_max,
+            NR_LOG2, BLOCK_8KB_LOG2) :
+        encode_fast(state, in, out, in + in_max, out + out_limit,
+            NR_LOG2, BLOCK_8KB_LOG2);
+    return result <= 0 ? result : result + 1; /* +1 for in_log2 */
+}
+
+static int encode16kb(
+    uint16_t *const state,
+    const uint8_t *const in,
+    uint8_t *out,
+    const uint_fast32_t in_max,
+    const uint_fast32_t out_max,
+    const uint_fast32_t out_limit)
+{
+    enum {
+        NR_LOG2 = 5
+    };
+    const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ?
+        encode_slow(state, in, out, in + in_max, out + out_max,
+            NR_LOG2, BLOCK_16KB_LOG2) :
+        encode_fast(state, in, out, in + in_max, out + out_limit,
+            NR_LOG2, BLOCK_16KB_LOG2);
+    return result <= 0 ? result : result + 1; /* +1 for in_log2 */
+}
+
+static int encode32kb(
+    uint16_t *const state,
+    const uint8_t *const in,
+    uint8_t *out,
+    const uint_fast32_t in_max,
+    const uint_fast32_t out_max,
+    const uint_fast32_t out_limit)
+{
+    enum {
+        NR_LOG2 = 4
+    };
+    const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ?
+        encode_slow(state, in, out, in + in_max, out + out_max,
+            NR_LOG2, BLOCK_32KB_LOG2) :
+        encode_fast(state, in, out, in + in_max, out + out_limit,
+            NR_LOG2, BLOCK_32KB_LOG2);
+    return result <= 0 ? result : result + 1; /* +1 for in_log2 */
+}
+
+static int encode64kb(
+    uint16_t *const state,
+    const uint8_t *const in,
+    uint8_t *out,
+    const uint_fast32_t in_max,
+    const uint_fast32_t out_max,
+    const uint_fast32_t out_limit)
+{
+    enum {
+        NR_LOG2 = 4
+    };
+    const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ?
+        encode_slow(state, in, out, in + in_max, out + out_max,
+            NR_LOG2, BLOCK_64KB_LOG2) :
+        encode_fast(state, in, out, in + in_max, out + out_limit,
+            NR_LOG2, BLOCK_64KB_LOG2);
+    return result <= 0 ? result : result + 1; /* +1 for in_log2 */
+}
+
+static int encode(
+    uint16_t *const state,
+    const uint8_t *const in,
+    uint8_t *out,
+    uint_fast32_t in_max,
+    uint_fast32_t out_max,
+    uint_fast32_t out_limit)
+{
+    const uint8_t in_log2 = (uint8_t)(most_significant_bit_of(
+        round_up_to_power_of2(in_max - REPEAT_MIN)));
+    m_set(state, 0, encode_state_bytes_min());
+    *out = in_log2 > BLOCK_4KB_LOG2 ? (uint8_t)(in_log2 - 
BLOCK_4KB_LOG2) : 0;
+    ++out;
+    --out_max;
+    --out_limit;
+    if (in_log2 < BLOCK_8KB_LOG2)
+        return encode4kb(state, in, out, in_max, out_max, out_limit);
+    if (in_log2 == BLOCK_8KB_LOG2)
+        return encode8kb(state, in, out, in_max, out_max, out_limit);
+    if (in_log2 == BLOCK_16KB_LOG2)
+        return encode16kb(state, in, out, in_max, out_max, out_limit);
+    if (in_log2 == BLOCK_32KB_LOG2)
+        return encode32kb(state, in, out, in_max, out_max, out_limit);
+    if (in_log2 == BLOCK_64KB_LOG2)
+        return encode64kb(state, in, out, in_max, out_max, out_limit);
+    return LZ4K_STATUS_FAILED;
+}
+
+int lz4k_encode(
+    void *const state,
+    const void *const in,
+    void *out,
+    unsigned in_max,
+    unsigned out_max,
+    unsigned out_limit)
+{
+    const unsigned gain_max = 64 > (in_max >> 6) ? 64 : (in_max >> 6);
+    const unsigned out_limit_min = in_max < out_max ? in_max : out_max;
+    const uint8_t *volatile in_end = (const uint8_t*)in + in_max;
+    const uint8_t *volatile out_end = (uint8_t*)out + out_max;
+    const void *volatile state_end =
+        (uint8_t*)state + encode_state_bytes_min();
+    if (unlikely(state == NULL))
+        return LZ4K_STATUS_FAILED;
+    if (unlikely(in == NULL || out == NULL))
+        return LZ4K_STATUS_FAILED;
+    if (unlikely(in_max <= gain_max))
+        return LZ4K_STATUS_INCOMPRESSIBLE;
+    if (unlikely(out_max <= gain_max)) /* need 1 byte for in_log2 */
+        return LZ4K_STATUS_FAILED;
+    /* ++use volatile pointers to prevent compiler optimizations */
+    if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= 
out_end))
+        return LZ4K_STATUS_FAILED;
+    if (unlikely(state >= state_end))
+        return LZ4K_STATUS_FAILED; /* pointer overflow */
+    if (!out_limit || out_limit >= out_limit_min)
+        out_limit = out_limit_min - gain_max;
+    return encode((uint16_t*)state, (const uint8_t*)in, (uint8_t*)out,
+              in_max, out_max, out_limit);
+}
+EXPORT_SYMBOL(lz4k_encode);
+
+const char *lz4k_version(void)
+{
+    static const char *version = "2020.07.07";
+    return version;
+}
+EXPORT_SYMBOL(lz4k_version);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("LZ4K encoder");
diff --git a/lib/lz4k/lz4k_encode_private.h 
b/lib/lz4k/lz4k_encode_private.h
new file mode 100644
index 000000000000..eb5cd162468f
--- /dev/null
+++ b/lib/lz4k/lz4k_encode_private.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights 
reserved.
+ * Description: LZ4K compression algorithm
+ * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
+ * Created: 2020-03-25
+ */
+
+#ifndef _LZ4K_ENCODE_PRIVATE_H
+#define _LZ4K_ENCODE_PRIVATE_H
+
+#include "lz4k_private.h"
+
+/* <nrSize bytes for whole block>+<1 terminating 0 byte> */
+static inline uint_fast32_t size_bytes_count(uint_fast32_t u)
+{
+    return (u + BYTE_MAX - 1) / BYTE_MAX;
+}
+
+/* minimum encoded size for non-compressible data */
+static inline uint_fast32_t encoded_bytes_min(
+    uint_fast32_t nr_log2,
+    uint_fast32_t in_max)
+{
+    return in_max < mask(nr_log2) ?
+        TAG_BYTES_MAX + in_max :
+        TAG_BYTES_MAX + size_bytes_count(in_max - mask(nr_log2)) + 
in_max;
+}
+
+enum {
+    NR_COPY_LOG2 = 4,
+    NR_COPY_MIN = 1 << NR_COPY_LOG2
+};
+
+static inline uint_fast32_t u_32(int64_t i)
+{
+    return (uint_fast32_t)i;
+}
+
+/* maximum encoded size for non-comprressible data if "fast" encoder 
is used */
+static inline uint_fast32_t encoded_bytes_max(
+    uint_fast32_t nr_log2,
+    uint_fast32_t in_max)
+{
+    uint_fast32_t r = TAG_BYTES_MAX + 
(uint32_t)round_up_to_log2(in_max, NR_COPY_LOG2);
+    return in_max < mask(nr_log2) ? r : r + size_bytes_count(in_max 
- mask(nr_log2));
+}
+
+enum {
+    HT_LOG2 = 12
+};
+
+/*
+ * Compressed data format (where {} means 0 or more occurrences, [] 
means
+ * optional):
+ * <24bits tag: (off_log2 rOffset| r_log2 rSize|nr_log2 nrSize)>
+ * {<nrSize byte>}[<nr bytes>]{<rSize byte>}
+ * <rSize byte> and <nrSize byte> bytes are terminated by byte != 255
+ *
+ */
+
+static inline void  update_utag(
+    uint_fast32_t r_bytes_max,
+    uint_fast32_t *utag,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2)
+{
+    const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + 
nr_log2));
+    *utag |= likely(r_bytes_max - REPEAT_MIN < r_mask) ?
+         ((r_bytes_max - REPEAT_MIN) << off_log2) : (r_mask << 
off_log2);
+}
+
+static inline const uint8_t *hashed(
+    const uint8_t *const in0,
+    uint16_t *const ht,
+    uint_fast32_t h,
+    const uint8_t *r)
+{
+    const uint8_t *q = in0 + ht[h];
+    ht[h] = (uint16_t)(r - in0);
+    return q;
+}
+
+static inline const uint8_t *repeat_start(
+    const uint8_t *q,
+    const uint8_t *r,
+    const uint8_t *const nr0,
+    const uint8_t *const in0)
+{
+    for (; r > nr0 && likely(q > in0) && unlikely(q[-1] == r[-1]); 
--q, --r);
+    return r;
+}
+
+int lz4k_out_tail(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    const uint8_t *const out,
+    const uint8_t *const nr0,
+    const uint8_t *const in_end,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    bool check_out);
+
+uint8_t *lz4k_out_non_repeat(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t utag,
+    const uint8_t *const nr0,
+    const uint8_t *const r,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    bool check_out);
+
+uint8_t *lz4k_out_r_bytes_left(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t r_bytes_max,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    const bool check_out);
+
+uint8_t *lz4k_out_repeat(
+    uint8_t *out_at,
+    uint8_t *const out_end,
+    uint_fast32_t utag,
+    uint_fast32_t r_bytes_max,
+    const uint_fast32_t nr_log2,
+    const uint_fast32_t off_log2,
+    const bool check_out);
+
+const uint8_t *lz4k_repeat_end(
+    const uint8_t *q,
+    const uint8_t *r,
+    const uint8_t *const in_end_safe,
+    const uint8_t *const in_end);
+
+#endif /* _LZ4K_ENCODE_PRIVATE_H */
+
diff --git a/lib/lz4k/lz4k_private.h b/lib/lz4k/lz4k_private.h
new file mode 100644
index 000000000000..2a8f4b37dc74
--- /dev/null
+++ b/lib/lz4k/lz4k_private.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights 
reserved.
+ * Description: LZ4K compression algorithm
+ * Author: Aleksei Romanovskii  aleksei.romanovskii@huawei.com
+ * Created: 2020-03-25
+ */
+
+#ifndef _LZ4K_PRIVATE_H
+#define _LZ4K_PRIVATE_H
+
+#if !defined(__KERNEL__)
+
+#include "lz4k.h"
+#include <stdint.h> /* uint*_t */
+#define __STDC_WANT_LIB_EXT1__ 1
+#include <string.h> /* memcpy() */
+
+#define likely(e) __builtin_expect(e, 1)
+#define unlikely(e) __builtin_expect(e, 0)
+
+#else /* __KERNEL__ */
+
+#include <linux/lz4k.h>
+#define __STDC_WANT_LIB_EXT1__ 1
+#include <linux/string.h> /* memcpy() */
+#include <linux/types.h> /* uint8_t, int8_t, uint16_t, int16_t,
+uint32_t, int32_t, uint64_t, int64_t */
+#include <stddef.h>
+
+typedef uint64_t uint_fast32_t;
+typedef int64_t int_fast32_t;
+
+#endif /* __KERNEL__ */
+
+#if defined(__GNUC__) && (__GNUC__>=4)
+#define LZ4K_WITH_GCC_INTRINSICS
+#endif
+
+#if !defined(__GNUC__)
+#define __builtin_expect(e, v) (e)
+#endif /* defined(__GNUC__) */
+
+enum {
+    BYTE_BITS = 8,
+    BYTE_BITS_LOG2 = 3,
+    BYTE_MAX = 255U,
+    REPEAT_MIN = 4,
+    TAG_BYTES_MAX = 3,
+    TAG_BITS_MAX  = TAG_BYTES_MAX * 8,
+    BLOCK_4KB_LOG2  = 12,
+    BLOCK_8KB_LOG2  = 13,
+    BLOCK_16KB_LOG2 = 14,
+    BLOCK_32KB_LOG2 = 15,
+    BLOCK_64KB_LOG2 = 16
+};
+
+static inline uint32_t mask(uint_fast32_t log2)
+{
+    return (1U << log2) - 1U;
+}
+
+static inline uint64_t mask64(uint_fast32_t log2)
+{
+    return (1ULL << log2) - 1ULL;
+}
+
+#if defined LZ4K_WITH_GCC_INTRINSICS
+static inline int most_significant_bit_of(uint64_t u)
+{
+    return (int)(__builtin_expect((u) == 0, false) ?
+             -1 : (int)(31 ^ (uint32_t)__builtin_clz((unsigned)(u))));
+}
+#else /* #!defined LZ4K_WITH_GCC_INTRINSICS */
+#error undefined most_significant_bit_of(unsigned u)
+#endif /* #if defined LZ4K_WITH_GCC_INTRINSICS */
+
+static inline uint64_t round_up_to_log2(uint64_t u, uint8_t log2)
+{
+    return (uint64_t)((u + mask64(log2)) & ~mask64(log2));
+}
+
+static inline uint64_t round_up_to_power_of2(uint64_t u)
+{
+    const int_fast32_t msb = most_significant_bit_of(u);
+    return round_up_to_log2(u, (uint8_t)msb);
+}
+
+static inline void m_copy(void *dst, const void *src, size_t total)
+{
+#if defined(__STDC_LIB_EXT1__)
+    (void)memcpy_s(dst, total, src, (total * 2) >> 1); /* *2 >> 1 to 
avoid bot errors */
+#else
+    (void)__builtin_memcpy(dst, src, total);
+#endif
+}
+
+static inline void m_set(void *dst, uint8_t value, size_t total)
+{
+#if defined(__STDC_LIB_EXT1__)
+    (void)memset_s(dst, total, value, (total * 2) >> 1); /* *2 >> 1 
to avoid bot errors */
+#else
+    (void)__builtin_memset(dst, value, total);
+#endif
+}
+
+static inline uint32_t read4_at(const void *p)
+{
+    uint32_t result;
+    m_copy(&result, p, sizeof(result));
+    return result;
+}
+
+static inline uint64_t read8_at(const void *p)
+{
+    uint64_t result;
+    m_copy(&result, p, sizeof(result));
+    return result;
+}
+
+static inline bool equal4(const uint8_t *const q, const uint8_t 
*const r)
+{
+    return read4_at(q) == read4_at(r);
+}
+
+static inline bool equal3(const uint8_t *const q, const uint8_t 
*const r)
+{
+    return (read4_at(q) << BYTE_BITS) == (read4_at(r) << BYTE_BITS);
+}
+
+static inline uint_fast32_t hash24v(const uint64_t r, uint32_t shift)
+{
+    const uint32_t m = 3266489917U;
+    return (((uint32_t)r << BYTE_BITS) * m) >> (32 - shift);
+}
+
+static inline uint_fast32_t hash24(const uint8_t *r, uint32_t shift)
+{
+    return hash24v(read4_at(r), shift);
+}
+
+static inline uint_fast32_t hash32v_2(const uint64_t r, uint32_t shift)
+{
+    const uint32_t m = 3266489917U;
+    return ((uint32_t)r * m) >> (32 - shift);
+}
+
+static inline uint_fast32_t hash32_2(const uint8_t *r, uint32_t shift)
+{
+    return hash32v_2(read4_at(r), shift);
+}
+
+static inline uint_fast32_t hash32v(const uint64_t r, uint32_t shift)
+{
+    const uint32_t m = 2654435761U;
+    return ((uint32_t)r * m) >> (32 - shift);
+}
+
+static inline uint_fast32_t hash32(const uint8_t *r, uint32_t shift)
+{
+    return hash32v(read4_at(r), shift);
+}
+
+static inline uint_fast32_t hash64v_5b(const uint64_t r, uint32_t 
shift)
+{
+    const uint64_t m = 889523592379ULL;
+    return (uint32_t)(((r << 24) * m) >> (64 - shift));
+}
+
+static inline uint_fast32_t hash64_5b(const uint8_t *r, uint32_t shift)
+{
+    return hash64v_5b(read8_at(r), shift);
+}
+
+static inline uint_fast32_t hash64v_6b(const uint64_t r, uint32_t 
shift)
+{
+    const uint64_t m = 227718039650203ULL;
+    return (uint32_t)(((r << 16) * m) >> (64 - shift));
+}
+
+static inline uint_fast32_t hash64_6b(const uint8_t *r, uint32_t shift)
+{
+    return hash64v_6b(read8_at(r), shift);
+}
+
+static inline uint_fast32_t hash64v_7b(const uint64_t r, uint32_t 
shift)
+{
+    const uint64_t m = 58295818150454627ULL;
+    return (uint32_t)(((r << 8) * m) >> (64 - shift));
+}
+
+static inline uint_fast32_t hash64_7b(const uint8_t *r, uint32_t shift)
+{
+    return hash64v_7b(read8_at(r), shift);
+}
+
+static inline uint_fast32_t hash64v_8b(const uint64_t r, uint32_t 
shift)
+{
+    const uint64_t m = 2870177450012600261ULL;
+    return (uint32_t)((r * m) >> (64 - shift));
+}
+
+static inline uint_fast32_t hash64_8b(const uint8_t *r, uint32_t shift)
+{
+    return hash64v_8b(read8_at(r), shift);
+}
+
+static inline void while_lt_copy_x(
+    uint8_t *dst,
+    const uint8_t *src,
+    const uint8_t *dst_end,
+    const size_t copy_min)
+{
+    for (; dst < dst_end; dst += copy_min, src += copy_min)
+        m_copy(dst, src, copy_min);
+}
+
+static inline void copy_x_while_lt(
+    uint8_t *dst,
+    const uint8_t *src,
+    const uint8_t *dst_end,
+    const size_t copy_min)
+{
+    m_copy(dst, src, copy_min);
+    while (dst + copy_min < dst_end)
+        m_copy(dst += copy_min, src += copy_min, copy_min);
+}
+
+static inline void copy_x_while_total(
+    uint8_t *dst,
+    const uint8_t *src,
+    size_t total,
+    const size_t copy_min)
+{
+    m_copy(dst, src, copy_min);
+    for (; total > copy_min; total-= copy_min)
+        m_copy(dst += copy_min, src += copy_min, copy_min);
+}
+
+static inline void copy_2x(
+    uint8_t *dst,
+    const uint8_t *src,
+    const size_t copy_min)
+{
+    m_copy(dst, src, copy_min);
+    m_copy(dst + copy_min, src + copy_min, copy_min);
+}
+
+static inline void copy_2x_as_x2_while_lt(
+    uint8_t *dst,
+    const uint8_t *src,
+    const uint8_t *dst_end,
+    const size_t copy_min)
+{
+    copy_2x(dst, src, copy_min);
+    while (dst + (copy_min << 1) < dst_end)
+        copy_2x(dst += (copy_min << 1), src += (copy_min << 1), 
copy_min);
+}
+
+static inline void while_lt_copy_2x_as_x2(
+    uint8_t *dst,
+    const uint8_t *src,
+    const uint8_t *dst_end,
+    const size_t copy_min)
+{
+    for (; dst < dst_end; dst += (copy_min << 1), src += (copy_min 
<< 1))
+        copy_2x(dst, src, copy_min);
+}
+
+#endif /* _LZ4K_PRIVATE_H */

    

Tu Jinjiang

tags

participants (1)