From: Vlastimil Babka vbabka@suse.cz
mainline inclusion from mainline-5.4-rc3 commit 59bb47985c1db229ccff8c5deebecd54fc77d2a9 category: bugfix bugzilla: 51349 CVE: NA
-------------------------------------------------
In most configurations, kmalloc() happens to return naturally aligned (i.e. aligned to the block size itself) blocks for power of two sizes.
That means some kmalloc() users might unknowingly rely on that alignment, until stuff breaks when the kernel is built with e.g. CONFIG_SLUB_DEBUG or CONFIG_SLOB, and blocks stop being aligned. Then developers have to devise workaround such as own kmem caches with specified alignment [1], which is not always practical, as recently evidenced in [2].
The topic has been discussed at LSF/MM 2019 [3]. Adding a 'kmalloc_aligned()' variant would not help with code unknowingly relying on the implicit alignment. For slab implementations it would either require creating more kmalloc caches, or allocate a larger size and only give back part of it. That would be wasteful, especially with a generic alignment parameter (in contrast with a fixed alignment to size).
Ideally we should provide to mm users what they need without difficult workarounds or own reimplementations, so let's make the kmalloc() alignment to size explicitly guaranteed for power-of-two sizes under all configurations. What this means for the three available allocators?
* SLAB object layout happens to be mostly unchanged by the patch. The implicitly provided alignment could be compromised with CONFIG_DEBUG_SLAB due to redzoning, however SLAB disables redzoning for caches with alignment larger than unsigned long long. Practically on at least x86 this includes kmalloc caches as they use cache line alignment, which is larger than that. Still, this patch ensures alignment on all arches and cache sizes.
* SLUB layout is also unchanged unless redzoning is enabled through CONFIG_SLUB_DEBUG and boot parameter for the particular kmalloc cache. With this patch, explicit alignment is guaranteed with redzoning as well. This will result in more memory being wasted, but that should be acceptable in a debugging scenario.
* SLOB has no implicit alignment so this patch adds it explicitly for kmalloc(). The potential downside is increased fragmentation. While pathological allocation scenarios are certainly possible, in my testing, after booting a x86_64 kernel+userspace with virtme, around 16MB memory was consumed by slab pages both before and after the patch, with difference in the noise.
[1] https://lore.kernel.org/linux-btrfs/c3157c8e8e0e7588312b40c853f65c02fe6c957a... [2] https://lore.kernel.org/linux-fsdevel/20190225040904.5557-1-ming.lei@redhat.... [3] https://lwn.net/Articles/787740/
[akpm@linux-foundation.org: documentation fixlet, per Matthew] Link: http://lkml.kernel.org/r/20190826111627.7505-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka vbabka@suse.cz Reviewed-by: Matthew Wilcox (Oracle) willy@infradead.org Acked-by: Michal Hocko mhocko@suse.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Acked-by: Christoph Hellwig hch@lst.de Cc: David Sterba dsterba@suse.cz Cc: Christoph Lameter cl@linux.com Cc: Pekka Enberg penberg@kernel.org Cc: David Rientjes rientjes@google.com Cc: Ming Lei ming.lei@redhat.com Cc: Dave Chinner david@fromorbit.com Cc: "Darrick J . Wong" darrick.wong@oracle.com Cc: Christoph Hellwig hch@lst.de Cc: James Bottomley James.Bottomley@HansenPartnership.com Cc: Vlastimil Babka vbabka@suse.cz Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit 59bb47985c1db229ccff8c5deebecd54fc77d2a9) Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- include/linux/slab.h | 4 ++++ mm/slab_common.c | 11 ++++++++++- mm/slob.c | 41 ++++++++++++++++++++++++++++++----------- 3 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/include/linux/slab.h b/include/linux/slab.h index d6393413ef09..e4f5d7f88c16 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -457,6 +457,10 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. * + * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN + * bytes. For @size of power of two bytes, the alignment is also guaranteed + * to be at least to the size. + * * The @flags argument may be one of: * * %GFP_USER - Allocate memory on behalf of user. May sleep. diff --git a/mm/slab_common.c b/mm/slab_common.c index a94b9981eb17..9b8d6a9f1e97 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -970,10 +970,19 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, unsigned int useroffset, unsigned int usersize) { int err; + unsigned int align = ARCH_KMALLOC_MINALIGN;
s->name = name; s->size = s->object_size = size; - s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); + + /* + * For power of two sizes, guarantee natural alignment for kmalloc + * caches, regardless of SL*B debugging options. + */ + if (is_power_of_2(size)) + align = max(align, size); + s->align = calculate_alignment(flags, align, size); + s->useroffset = useroffset; s->usersize = usersize;
diff --git a/mm/slob.c b/mm/slob.c index 307c2c9feb44..fdf284009be9 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -215,7 +215,7 @@ static void slob_free_pages(void *b, int order) /* * Allocate a slob block within a given slob_page sp. */ -static void *slob_page_alloc(struct page *sp, size_t size, int align) +static void *slob_page_alloc(struct page *sp, size_t size, int align, int align_offset) { slob_t *prev, *cur, *aligned = NULL; int delta = 0, units = SLOB_UNITS(size); @@ -223,8 +223,17 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align) for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) { slobidx_t avail = slob_units(cur);
+ /* + * 'aligned' will hold the address of the slob block so that the + * address 'aligned'+'align_offset' is aligned according to the + * 'align' parameter. This is for kmalloc() which prepends the + * allocated block with its size, so that the block itself is + * aligned when needed. + */ if (align) { - aligned = (slob_t *)ALIGN((unsigned long)cur, align); + aligned = (slob_t *) + (ALIGN((unsigned long)cur + align_offset, align) + - align_offset); delta = aligned - cur; } if (avail >= units + delta) { /* room enough? */ @@ -266,7 +275,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align) /* * slob_alloc: entry point into the slob allocator. */ -static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) +static void *slob_alloc(size_t size, gfp_t gfp, int align, int node, + int align_offset) { struct page *sp; struct list_head *prev; @@ -298,7 +308,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
/* Attempt to alloc */ prev = sp->lru.prev; - b = slob_page_alloc(sp, size, align); + b = slob_page_alloc(sp, size, align, align_offset); if (!b) continue;
@@ -326,7 +336,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) INIT_LIST_HEAD(&sp->lru); set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); set_slob_page_free(sp, slob_list); - b = slob_page_alloc(sp, size, align); + b = slob_page_alloc(sp, size, align, align_offset); BUG_ON(!b); spin_unlock_irqrestore(&slob_lock, flags); } @@ -428,7 +438,7 @@ static __always_inline void * __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) { unsigned int *m; - int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); void *ret;
gfp &= gfp_allowed_mask; @@ -436,19 +446,28 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) fs_reclaim_acquire(gfp); fs_reclaim_release(gfp);
- if (size < PAGE_SIZE - align) { + if (size < PAGE_SIZE - minalign) { + int align = minalign; + + /* + * For power of two sizes, guarantee natural alignment for + * kmalloc()'d objects. + */ + if (is_power_of_2(size)) + align = max(minalign, (int) size); + if (!size) return ZERO_SIZE_PTR;
- m = slob_alloc(size + align, gfp, align, node); + m = slob_alloc(size + minalign, gfp, align, node, minalign);
if (!m) return NULL; *m = size; - ret = (void *)m + align; + ret = (void *)m + minalign;
trace_kmalloc_node(caller, ret, - size, size + align, gfp, node); + size, size + minalign, gfp, node); } else { unsigned int order = get_order(size);
@@ -544,7 +563,7 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) fs_reclaim_release(flags);
if (c->size < PAGE_SIZE) { - b = slob_alloc(c->size, flags, c->align, node); + b = slob_alloc(c->size, flags, c->align, node, 0); trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size, SLOB_UNITS(c->size) * SLOB_UNIT, flags, node);