From: Sergey Senozhatsky senozhatsky@chromium.org
mainline inclusion from mainline-v6.2-rc1 commit 84b33bf7888975d28c0e57011b75c445279c60ec category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7TWVA CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
-------------------------------------------
Allow zram to recompress (using secondary compression streams) pages.
Re-compression algorithms (we support up to 3 at this stage) are selected via recomp_algorithm:
echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm
Please read documentation for more details.
We support several recompression modes:
1) IDLE pages recompression is activated by `idle` mode
echo "type=idle" > /sys/block/zram0/recompress
2) Since there may be many idle pages user-space may pass a size threshold value (in bytes) and we will recompress pages only of equal or greater size:
echo "threshold=888" > /sys/block/zram0/recompress
3) HUGE pages recompression is activated by `huge` mode
echo "type=huge" > /sys/block/zram0/recompress
4) HUGE_IDLE pages recompression is activated by `huge_idle` mode
echo "type=huge_idle" > /sys/block/zram0/recompress
[senozhatsky@chromium.org: we should always zero out err variable in recompress loop[ Link: https://lkml.kernel.org/r/20221110143423.3250790-1-senozhatsky@chromium.org Link: https://lkml.kernel.org/r/20221109115047.2921851-5-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky senozhatsky@chromium.org Acked-by: Minchan Kim minchan@kernel.org Cc: Nathan Chancellor nathan@kernel.org Cc: Alexey Romanov avromanov@sberdevices.ru Cc: Nhat Pham nphamcs@gmail.com Cc: Nitin Gupta ngupta@vflare.org Cc: Suleiman Souhlal suleiman@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflicts: drivers/block/zram/zram_drv.h
Signed-off-by: Jinjiang Tu tujinjiang@huawei.com --- drivers/block/zram/Kconfig | 9 ++ drivers/block/zram/zram_drv.c | 264 +++++++++++++++++++++++++++++++++- drivers/block/zram/zram_drv.h | 7 + 3 files changed, 277 insertions(+), 3 deletions(-)
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index fe7a4b7d30cf..a23139b7e9c5 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -37,3 +37,12 @@ config ZRAM_MEMORY_TRACKING /sys/kernel/debug/zram/zramX/block_state.
See Documentation/admin-guide/blockdev/zram.rst for more information. + +config ZRAM_MULTI_COMP + bool "Enable multiple compression streams" + depends on ZRAM + help + This will enable multi-compression streams, so that ZRAM can + re-compress pages using a potentially slower but more effective + compression algorithm. Note, that IDLE page recompression + requires ZRAM_MEMORY_TRACKING. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4ebd60a1ae37..2535702add00 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -157,6 +157,25 @@ static inline bool is_partial_io(struct bio_vec *bvec) } #endif
+static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) +{ + prio &= ZRAM_COMP_PRIORITY_MASK; + /* + * Clear previous priority value first, in case if we recompress + * further an already recompressed page + */ + zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << + ZRAM_COMP_PRIORITY_BIT1); + zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); +} + +static inline u32 zram_get_priority(struct zram *zram, u32 index) +{ + u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; + + return prio & ZRAM_COMP_PRIORITY_MASK; +} + /* * Check if request is within bounds and aligned on zram logical blocks. */ @@ -1285,6 +1304,11 @@ static void zram_free_page(struct zram *zram, size_t index) atomic64_dec(&zram->stats.huge_pages); }
+ if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); + + zram_set_priority(zram, index, 0); + if (zram_test_flag(zram, index, ZRAM_WB)) { zram_clear_flag(zram, index, ZRAM_WB); free_block_bdev(zram, zram_get_element(zram, index)); @@ -1345,6 +1369,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, unsigned long handle; unsigned int size; void *src, *dst; + u32 prio; int ret;
handle = zram_get_handle(zram, index); @@ -1361,8 +1386,10 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
size = zram_get_obj_size(zram, index);
- if (size != PAGE_SIZE) - zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); + if (size != PAGE_SIZE) { + prio = zram_get_priority(zram, index); + zstrm = zcomp_stream_get(zram->comps[prio]); + }
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { @@ -1374,7 +1401,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, dst = kmap_atomic(page); ret = zcomp_decompress(zstrm, src, size, dst); kunmap_atomic(dst); - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zram->comps[prio]); } zs_unmap_object(zram->mem_pool, handle); return ret; @@ -1599,6 +1626,235 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, return ret; }
+#ifdef CONFIG_ZRAM_MULTI_COMP +/* + * This function will decompress (unless it's ZRAM_HUGE) the page and then + * attempt to compress it using provided compression algorithm priority + * (which is potentially more effective). + * + * Corresponding ZRAM slot should be locked. + */ +static int zram_recompress(struct zram *zram, u32 index, struct page *page, + u32 threshold, u32 prio, u32 prio_max) +{ + struct zcomp_strm *zstrm = NULL; + unsigned long handle_old; + unsigned long handle_new; + unsigned int comp_len_old; + unsigned int comp_len_new; + void *src, *dst; + int ret; + + handle_old = zram_get_handle(zram, index); + if (!handle_old) + return -EINVAL; + + comp_len_old = zram_get_obj_size(zram, index); + /* + * Do not recompress objects that are already "small enough". + */ + if (comp_len_old < threshold) + return 0; + + ret = zram_read_from_zspool(zram, page, index); + if (ret) + return ret; + + /* + * Iterate the secondary comp algorithms list (in order of priority) + * and try to recompress the page. + */ + for (; prio < prio_max; prio++) { + if (!zram->comps[prio]) + continue; + + /* + * Skip if the object is already re-compressed with a higher + * priority algorithm (or same algorithm). + */ + if (prio <= zram_get_priority(zram, index)) + continue; + + zstrm = zcomp_stream_get(zram->comps[prio]); + src = kmap_atomic(page); + ret = zcomp_compress(zstrm, src, &comp_len_new); + kunmap_atomic(src); + + if (ret) { + zcomp_stream_put(zram->comps[prio]); + return ret; + } + + /* Continue until we make progress */ + if (comp_len_new >= huge_class_size || + comp_len_new >= comp_len_old || + (threshold && comp_len_new >= threshold)) { + zcomp_stream_put(zram->comps[prio]); + continue; + } + + /* Recompression was successful so break out */ + break; + } + + /* + * We did not try to recompress, e.g. when we have only one + * secondary algorithm and the page is already recompressed + * using that algorithm + */ + if (!zstrm) + return 0; + + /* + * All secondary algorithms failed to re-compress the page in a way + * that would save memory, mark the object as incompressible so that + * we will not try to compress it again. + */ + if (comp_len_new >= huge_class_size || comp_len_new >= comp_len_old) { + zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); + return 0; + } + + /* Successful recompression but above threshold */ + if (threshold && comp_len_new >= threshold) + return 0; + + /* + * No direct reclaim (slow path) for handle allocation and no + * re-compression attempt (unlike in __zram_bvec_write()) since + * we already have stored that object in zsmalloc. If we cannot + * alloc memory for recompressed object then we bail out and + * simply keep the old (existing) object in zsmalloc. + */ + handle_new = zs_malloc(zram->mem_pool, comp_len_new, + __GFP_KSWAPD_RECLAIM | + __GFP_NOWARN | + __GFP_HIGHMEM | + __GFP_MOVABLE); + if (IS_ERR_VALUE(handle_new)) { + zcomp_stream_put(zram->comps[prio]); + return PTR_ERR((void *)handle_new); + } + + dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); + memcpy(dst, zstrm->buffer, comp_len_new); + zcomp_stream_put(zram->comps[prio]); + + zs_unmap_object(zram->mem_pool, handle_new); + + zram_free_page(zram, index); + zram_set_handle(zram, index, handle_new); + zram_set_obj_size(zram, index, comp_len_new); + zram_set_priority(zram, index, prio); + + atomic64_add(comp_len_new, &zram->stats.compr_data_size); + atomic64_inc(&zram->stats.pages_stored); + + return 0; +} + +#define RECOMPRESS_IDLE (1 << 0) +#define RECOMPRESS_HUGE (1 << 1) + +static ssize_t recompress_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + u32 mode = 0, threshold = 0, prio = ZRAM_SECONDARY_COMP; + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + char *args, *param, *val; + unsigned long index; + struct page *page; + ssize_t ret; + + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!*val) + return -EINVAL; + + if (!strcmp(param, "type")) { + if (!strcmp(val, "idle")) + mode = RECOMPRESS_IDLE; + if (!strcmp(val, "huge")) + mode = RECOMPRESS_HUGE; + if (!strcmp(val, "huge_idle")) + mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; + continue; + } + + if (!strcmp(param, "threshold")) { + /* + * We will re-compress only idle objects equal or + * greater in size than watermark. + */ + ret = kstrtouint(val, 10, &threshold); + if (ret) + return ret; + continue; + } + } + + if (threshold >= PAGE_SIZE) + return -EINVAL; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + ret = -EINVAL; + goto release_init_lock; + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto release_init_lock; + } + + ret = len; + for (index = 0; index < nr_pages; index++) { + int err = 0; + + zram_slot_lock(zram, index); + + if (!zram_allocated(zram, index)) + goto next; + + if (mode & RECOMPRESS_IDLE && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + + if (mode & RECOMPRESS_HUGE && + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + + if (zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_UNDER_WB) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + goto next; + + err = zram_recompress(zram, index, page, threshold, + prio, ZRAM_MAX_COMPS); +next: + zram_slot_unlock(zram, index); + if (err) { + ret = err; + break; + } + + cond_resched(); + } + + __free_page(page); + +release_init_lock: + up_read(&zram->init_lock); + return ret; +} +#endif + /* * zram_bio_discard - handler on discard request * @index: physical block index in PAGE_SIZE units @@ -1999,6 +2255,7 @@ static DEVICE_ATTR_RW(writeback_limit_enable); #endif #ifdef CONFIG_ZRAM_MULTI_COMP static DEVICE_ATTR_RW(recomp_algorithm); +static DEVICE_ATTR_WO(recompress); #endif
static struct attribute *zram_disk_attrs[] = { @@ -2025,6 +2282,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_debug_stat.attr, #ifdef CONFIG_ZRAM_MULTI_COMP &dev_attr_recomp_algorithm.attr, + &dev_attr_recompress.attr, #endif NULL, }; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index a3fcd6b21091..c56e216f928d 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -41,6 +41,9 @@ */ #define ZRAM_FLAG_SHIFT 24
+/* Only 2 bits are allowed for comp priority index */ +#define ZRAM_COMP_PRIORITY_MASK 0x3 + /* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { /* zram slot is locked */ @@ -50,6 +53,10 @@ enum zram_pageflags { ZRAM_UNDER_WB, /* page is under writeback */ ZRAM_HUGE, /* Incompressible page */ ZRAM_IDLE, /* not accessed page since last idle marking */ + ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */ + + ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */ + ZRAM_COMP_PRIORITY_BIT2, /* Second bit of comp priority index */
__NR_ZRAM_PAGEFLAGS, };