mailweb.openeuler.org
Manage this list

Keyboard Shortcuts

Thread View

  • j: Next unread message
  • k: Previous unread message
  • j a: Jump to all threads
  • j l: Jump to MailingList overview

Kernel

Threads by month
  • ----- 2025 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2024 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2023 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2022 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2021 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2020 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2019 -----
  • December
kernel@openeuler.org

  • 61 participants
  • 22260 discussions
[PATCH OLK-6.6 V2] dm-thin: Add support for split cache
by Zhang Zekun 26 Dec '25

26 Dec '25
hulk inclusion category: feature bugzilla: NA CVE: NA -------------------------------------------------------- The cache of dm-thin is common used for first level btree and the second level btree, however the first level btree has less dm-buffer than the second level btree, which might cause extra look up steps for the first level btree when accessing the cache of the dm pool. Thus, add support for spliting the first level btree and the second level btree' cache, which might speed up the look up process of the first level btree routine. Signed-off-by: Zhang Zekun <zhangzekun11(a)huawei.com> --- drivers/md/dm-bufio.c | 97 ++++++++++++++----- drivers/md/dm-thin-metadata.c | 13 ++- drivers/md/dm.h | 18 +++- drivers/md/persistent-data/dm-block-manager.c | 11 +++ drivers/md/persistent-data/dm-block-manager.h | 2 + include/linux/dm-bufio.h | 4 + 6 files changed, 120 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 8ce77aec0..6edfe8239 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -254,9 +254,9 @@ enum evict_result { ER_STOP, /* stop looking for something to evict */ }; -typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context); +typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context, void *bc); -static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep) +static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep, void *bc) { unsigned long tested = 0; struct list_head *h = lru->cursor; @@ -276,7 +276,7 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con atomic_set(&le->referenced, 0); } else { tested++; - switch (pred(le, context)) { + switch (pred(le, context, bc)) { case ER_EVICT: /* * Adjust the cursor, so we start the next @@ -362,6 +362,7 @@ struct dm_buffer { unsigned int stack_len; unsigned long stack_entries[MAX_STACK]; #endif + bool special; }; /*--------------------------------------------------------------*/ @@ -399,12 +400,18 @@ struct dm_buffer_cache { * on the locks. */ unsigned int num_locks; + unsigned long special; bool no_sleep; struct buffer_tree trees[]; }; static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); +static inline unsigned int cache_index_special(sector_t block, unsigned int num_locks, unsigned long special) +{ + return dm_hash_locks_index_special(block, num_locks, special); +} + static inline unsigned int cache_index(sector_t block, unsigned int num_locks) { return dm_hash_locks_index(block, num_locks); @@ -413,33 +420,33 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks) static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + read_lock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + down_read(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + read_unlock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + up_read(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + write_lock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + down_write(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + write_unlock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + up_write(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } /* @@ -506,9 +513,9 @@ static void lh_exit(struct lock_history *lh) * Named 'next' because there is no corresponding * 'up/unlock' call since it's done automatically. */ -static void lh_next(struct lock_history *lh, sector_t b) +static void lh_next(struct lock_history *lh, sector_t b, unsigned long special) { - unsigned int index = cache_index(b, lh->no_previous); /* no_previous is num_locks */ + unsigned int index = cache_index_special(b, lh->no_previous, special); /* no_previous is num_locks */ if (lh->previous != lh->no_previous) { if (lh->previous != index) { @@ -614,12 +621,23 @@ static void __cache_inc_buffer(struct dm_buffer *b) WRITE_ONCE(b->last_accessed, jiffies); } +static struct dm_buffer *cache_get_noref(struct dm_buffer_cache *bc, sector_t block) +{ + struct dm_buffer *b; + + cache_read_lock(bc, block); + b = __cache_get(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].root, block); + cache_read_unlock(bc, block); + + return b; +} + static struct dm_buffer *cache_get(struct dm_buffer_cache *bc, sector_t block) { struct dm_buffer *b; cache_read_lock(bc, block); - b = __cache_get(&bc->trees[cache_index(block, bc->num_locks)].root, block); + b = __cache_get(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].root, block); if (b) { lru_reference(&b->lru); __cache_inc_buffer(b); @@ -666,12 +684,13 @@ struct evict_wrapper { * Wraps the buffer predicate turning it into an lru predicate. Adds * extra test for hold_count. */ -static enum evict_result __evict_pred(struct lru_entry *le, void *context) +static enum evict_result __evict_pred(struct lru_entry *le, void *context, void *bc) { struct evict_wrapper *w = context; struct dm_buffer *b = le_to_buffer(le); + struct dm_buffer_cache *bcc = (struct dm_buffer_cache *)bc; - lh_next(w->lh, b->block); + lh_next(w->lh, b->block, b->special ? b->block : bcc->special); if (atomic_read(&b->hold_count)) return ER_DONT_EVICT; @@ -687,13 +706,13 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode struct lru_entry *le; struct dm_buffer *b; - le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep); + le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep, (void*)bc); if (!le) return NULL; b = le_to_buffer(le); /* __evict_pred will have locked the appropriate tree. */ - rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root); + rb_erase(&b->node, &bc->trees[cache_index_special(b->block, bc->num_locks, b->special? b->block : bc->special)].root); return b; } @@ -741,7 +760,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_ struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; while (true) { - le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep); + le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep, (void *)bc); if (!le) break; @@ -792,7 +811,7 @@ static void __cache_iterate(struct dm_buffer_cache *bc, int list_mode, do { struct dm_buffer *b = le_to_buffer(le); - lh_next(lh, b->block); + lh_next(lh, b->block, bc->special); switch (fn(b, context)) { case IT_NEXT: @@ -858,8 +877,7 @@ static bool cache_insert(struct dm_buffer_cache *bc, struct dm_buffer *b) return false; cache_write_lock(bc, b->block); - BUG_ON(atomic_read(&b->hold_count) != 1); - r = __cache_insert(&bc->trees[cache_index(b->block, bc->num_locks)].root, b); + r = __cache_insert(&bc->trees[cache_index_special(b->block, bc->num_locks, bc->special)].root, b); if (r) lru_insert(&bc->lru[b->list_mode], &b->lru); cache_write_unlock(bc, b->block); @@ -868,6 +886,20 @@ static bool cache_insert(struct dm_buffer_cache *bc, struct dm_buffer *b) } /*--------------*/ +static bool cache_remove_nocheck(struct dm_buffer_cache *bc, struct dm_buffer *b) +{ + bool r; + + cache_write_lock(bc, b->block); + + r = true; + rb_erase(&b->node, &bc->trees[cache_index_special(b->block, bc->num_locks, bc->special)].root); + lru_remove(&bc->lru[b->list_mode], &b->lru); + + cache_write_unlock(bc, b->block); + + return r; +} /* * Removes buffer from cache, ownership of the buffer passes back to the caller. @@ -885,7 +917,7 @@ static bool cache_remove(struct dm_buffer_cache *bc, struct dm_buffer *b) r = false; } else { r = true; - rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root); + rb_erase(&b->node, &bc->trees[cache_index_special(b->block, bc->num_locks, bc->special)].root); lru_remove(&bc->lru[b->list_mode], &b->lru); } @@ -1019,6 +1051,12 @@ struct dm_bufio_client { struct dm_buffer_cache cache; /* must be last member */ }; +void dm_setup_buffer_cache(struct dm_bufio_client *bc, unsigned long block) +{ + bc->cache.special = block; +} +EXPORT_SYMBOL_GPL(dm_setup_buffer_cache); + /*----------------------------------------------------------------*/ #define dm_bufio_in_request() (!!current->bio_list) @@ -1792,6 +1830,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, b->read_error = 0; b->write_error = 0; b->list_mode = LIST_CLEAN; + b->special = (block == c->cache.special); if (nf == NF_FRESH) b->state = 0; @@ -2642,6 +2681,18 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start) } EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); +void dm_move_cache(struct dm_bufio_client *bc, unsigned long block) +{ + struct dm_buffer *b; + + b = cache_get_noref(&bc->cache, block); + cache_remove_nocheck(&bc->cache, b); + bc->cache.special = block; + b->special = true; + cache_insert(&bc->cache, b); +} +EXPORT_SYMBOL_GPL(dm_move_cache); + /*--------------------------------------------------------------*/ static unsigned int get_max_age_hz(void) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 6022189c1..210404a57 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -629,6 +629,7 @@ static int __format_metadata(struct dm_pool_metadata *pmd) r = dm_btree_empty(&pmd->info, &pmd->root); if (r < 0) goto bad_cleanup_nb_tm; + dm_move_bm_cache(pmd->bm, pmd->root); r = dm_btree_empty(&pmd->details_info, &pmd->details_root); if (r < 0) { @@ -747,6 +748,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd) * root to avoid accessing broken btree. */ pmd->root = le64_to_cpu(disk_super->data_mapping_root); + dm_setup_bm_cache(pmd->bm, pmd->root); pmd->details_root = le64_to_cpu(disk_super->device_details_root); __setup_btree_details(pmd); @@ -837,6 +839,7 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); pmd->time = le32_to_cpu(disk_super->time); pmd->root = le64_to_cpu(disk_super->data_mapping_root); + dm_setup_bm_cache(pmd->bm, pmd->root); pmd->details_root = le64_to_cpu(disk_super->device_details_root); pmd->trans_id = le64_to_cpu(disk_super->trans_id); pmd->flags = le32_to_cpu(disk_super->flags); @@ -1132,10 +1135,12 @@ static int __create_thin(struct dm_pool_metadata *pmd, dm_btree_del(&pmd->bl_info, dev_root); return r; } + dm_move_bm_cache(pmd->bm, pmd->root); r = __open_device(pmd, dev, 1, &td); if (r) { dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); + dm_move_bm_cache(pmd->bm, pmd->root); dm_btree_del(&pmd->bl_info, dev_root); return r; } @@ -1272,6 +1277,7 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); if (r) return r; + dm_move_bm_cache(pmd->bm, pmd->root); return 0; } @@ -1678,6 +1684,8 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, if (r) return r; + dm_move_bm_cache(pmd->bm, pmd->root); + td->changed = true; if (inserted) td->mapped_blocks++; @@ -1723,6 +1731,7 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_ r = dm_btree_remove(&pmd->tl_info, pmd->root, keys, &pmd->root); if (r) return r; + dm_move_bm_cache(pmd->bm, pmd->root); /* * Remove leaves stops at the first unmapped entry, so we have to @@ -1754,7 +1763,9 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_ */ value = cpu_to_le64(mapping_root); __dm_bless_for_disk(&value); - return dm_btree_insert(&pmd->tl_info, pmd->root, keys, &value, &pmd->root); + r = dm_btree_insert(&pmd->tl_info, pmd->root, keys, &value, &pmd->root); + dm_move_bm_cache(pmd->bm, pmd->root); + return r; } int dm_thin_remove_range(struct dm_thin_device *td, diff --git a/drivers/md/dm.h b/drivers/md/dm.h index f682295af..3d0c9f860 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -232,12 +232,28 @@ static inline unsigned int dm_num_hash_locks(void) { unsigned int num_locks = roundup_pow_of_two(num_online_cpus()) << 1; - return min_t(unsigned int, num_locks, DM_HASH_LOCKS_MAX); + return min_t(unsigned int, num_locks, DM_HASH_LOCKS_MAX) + 32; } #define DM_HASH_LOCKS_MULT 4294967291ULL #define DM_HASH_LOCKS_SHIFT 6 +static inline unsigned int dm_hash_locks_index_special(sector_t block, + unsigned int num_locks, + unsigned long special) +{ + + + sector_t h1 = (block * DM_HASH_LOCKS_MULT) >> DM_HASH_LOCKS_SHIFT; + sector_t h2 = h1 >> DM_HASH_LOCKS_SHIFT; + + if (block == special) + return ((h1 ^ h2) & 31) + num_locks - 32; + + return (h1 ^ h2) & (num_locks - 33); +} + + static inline unsigned int dm_hash_locks_index(sector_t block, unsigned int num_locks) { diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 0e010e120..da560f825 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -414,6 +414,17 @@ struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, } EXPORT_SYMBOL_GPL(dm_block_manager_create); +void dm_setup_bm_cache(struct dm_block_manager *bm, unsigned long block) +{ + dm_setup_buffer_cache(bm->bufio, block); +} +EXPORT_SYMBOL_GPL(dm_setup_bm_cache); + +void dm_move_bm_cache(struct dm_block_manager *bm, unsigned long block) +{ + dm_move_cache(bm->bufio, block); +} + void dm_block_manager_destroy(struct dm_block_manager *bm) { dm_bufio_client_destroy(bm->bufio); diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index f706d3de8..25121bbc5 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -35,7 +35,9 @@ struct dm_block_manager; struct dm_block_manager *dm_block_manager_create( struct block_device *bdev, unsigned int block_size, unsigned int max_held_per_thread); +void dm_move_bm_cache(struct dm_block_manager *bm, unsigned long block); void dm_block_manager_destroy(struct dm_block_manager *bm); +void dm_setup_bm_cache(struct dm_block_manager *bm, unsigned long block); void dm_block_manager_reset(struct dm_block_manager *bm); unsigned int dm_bm_block_size(struct dm_block_manager *bm); diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 75e7d8cbb..d7d7a33cc 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -71,6 +71,8 @@ void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void dm_setup_buffer_cache(struct dm_bufio_client *bc, unsigned long block); + /* * Like dm_bufio_read, but don't read anything from the disk. It is * expected that the caller initializes the buffer and marks it dirty. @@ -139,6 +141,8 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c */ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block); +void dm_move_cache(struct dm_bufio_client *bc, unsigned long block); + /* * Free the given range of buffers. * This is just a hint, if the buffer is in use or dirty, this function -- 2.43.0
2 1
0 0
[PATCH OLK-6.6] blk-cgroup: fix possible deadlock while configuring policy
by Zheng Qixing 26 Dec '25

26 Dec '25
From: Yu Kuai <yukuai3(a)huawei.com> stable inclusion from stable-v6.6.117 commit e1729523759cda2c0afb76b1c88e0d2f2ef5b7cb category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/11601 CVE: CVE-2025-68178 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… ------------------ [ Upstream commit 5d726c4dbeeddef612e6bed27edd29733f4d13af ] Following deadlock can be triggered easily by lockdep: WARNING: possible circular locking dependency detected 6.17.0-rc3-00124-ga12c2658ced0 #1665 Not tainted ------------------------------------------------------ check/1334 is trying to acquire lock: ff1100011d9d0678 (&q->sysfs_lock){+.+.}-{4:4}, at: blk_unregister_queue+0x53/0x180 but task is already holding lock: ff1100011d9d00e0 (&q->q_usage_counter(queue)#3){++++}-{0:0}, at: del_gendisk+0xba/0x110 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&q->q_usage_counter(queue)#3){++++}-{0:0}: blk_queue_enter+0x40b/0x470 blkg_conf_prep+0x7b/0x3c0 tg_set_limit+0x10a/0x3e0 cgroup_file_write+0xc6/0x420 kernfs_fop_write_iter+0x189/0x280 vfs_write+0x256/0x490 ksys_write+0x83/0x190 __x64_sys_write+0x21/0x30 x64_sys_call+0x4608/0x4630 do_syscall_64+0xdb/0x6b0 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&q->rq_qos_mutex){+.+.}-{4:4}: __mutex_lock+0xd8/0xf50 mutex_lock_nested+0x2b/0x40 wbt_init+0x17e/0x280 wbt_enable_default+0xe9/0x140 blk_register_queue+0x1da/0x2e0 __add_disk+0x38c/0x5d0 add_disk_fwnode+0x89/0x250 device_add_disk+0x18/0x30 virtblk_probe+0x13a3/0x1800 virtio_dev_probe+0x389/0x610 really_probe+0x136/0x620 __driver_probe_device+0xb3/0x230 driver_probe_device+0x2f/0xe0 __driver_attach+0x158/0x250 bus_for_each_dev+0xa9/0x130 driver_attach+0x26/0x40 bus_add_driver+0x178/0x3d0 driver_register+0x7d/0x1c0 __register_virtio_driver+0x2c/0x60 virtio_blk_init+0x6f/0xe0 do_one_initcall+0x94/0x540 kernel_init_freeable+0x56a/0x7b0 kernel_init+0x2b/0x270 ret_from_fork+0x268/0x4c0 ret_from_fork_asm+0x1a/0x30 -> #0 (&q->sysfs_lock){+.+.}-{4:4}: __lock_acquire+0x1835/0x2940 lock_acquire+0xf9/0x450 __mutex_lock+0xd8/0xf50 mutex_lock_nested+0x2b/0x40 blk_unregister_queue+0x53/0x180 __del_gendisk+0x226/0x690 del_gendisk+0xba/0x110 sd_remove+0x49/0xb0 [sd_mod] device_remove+0x87/0xb0 device_release_driver_internal+0x11e/0x230 device_release_driver+0x1a/0x30 bus_remove_device+0x14d/0x220 device_del+0x1e1/0x5a0 __scsi_remove_device+0x1ff/0x2f0 scsi_remove_device+0x37/0x60 sdev_store_delete+0x77/0x100 dev_attr_store+0x1f/0x40 sysfs_kf_write+0x65/0x90 kernfs_fop_write_iter+0x189/0x280 vfs_write+0x256/0x490 ksys_write+0x83/0x190 __x64_sys_write+0x21/0x30 x64_sys_call+0x4608/0x4630 do_syscall_64+0xdb/0x6b0 entry_SYSCALL_64_after_hwframe+0x76/0x7e other info that might help us debug this: Chain exists of: &q->sysfs_lock --> &q->rq_qos_mutex --> &q->q_usage_counter(queue)#3 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&q->q_usage_counter(queue)#3); lock(&q->rq_qos_mutex); lock(&q->q_usage_counter(queue)#3); lock(&q->sysfs_lock); Root cause is that queue_usage_counter is grabbed with rq_qos_mutex held in blkg_conf_prep(), while queue should be freezed before rq_qos_mutex from other context. The blk_queue_enter() from blkg_conf_prep() is used to protect against policy deactivation, which is already protected with blkcg_mutex, hence convert blk_queue_enter() to blkcg_mutex to fix this problem. Meanwhile, consider that blkcg_mutex is held after queue is freezed from policy deactivation, also convert blkg_alloc() to use GFP_NOIO. Signed-off-by: Yu Kuai <yukuai3(a)huawei.com> Reviewed-by: Ming Lei <ming.lei(a)redhat.com> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> Signed-off-by: Sasha Levin <sashal(a)kernel.org> Signed-off-by: Zheng Qixing <zhengqixing(a)huawei.com> --- block/blk-cgroup.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0a56cfac5a5d..eadf78bdcc40 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -857,14 +857,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, disk = ctx->bdev->bd_disk; q = disk->queue; - /* - * blkcg_deactivate_policy() requires queue to be frozen, we can grab - * q_usage_counter to prevent concurrent with blkcg_deactivate_policy(). - */ - ret = blk_queue_enter(q, 0); - if (ret) - goto fail; - + /* Prevent concurrent with blkcg_deactivate_policy() */ + mutex_lock(&q->blkcg_mutex); spin_lock_irq(&q->queue_lock); if (!blkcg_policy_enabled(q, pol)) { @@ -894,16 +888,16 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, /* Drop locks to do new blkg allocation with GFP_KERNEL. */ spin_unlock_irq(&q->queue_lock); - new_blkg = blkg_alloc(pos, disk, GFP_KERNEL); + new_blkg = blkg_alloc(pos, disk, GFP_NOIO); if (unlikely(!new_blkg)) { ret = -ENOMEM; - goto fail_exit_queue; + goto fail_exit; } if (radix_tree_preload(GFP_KERNEL)) { blkg_free(new_blkg); ret = -ENOMEM; - goto fail_exit_queue; + goto fail_exit; } spin_lock_irq(&q->queue_lock); @@ -931,7 +925,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto success; } success: - blk_queue_exit(q); + mutex_unlock(&q->blkcg_mutex); ctx->blkg = blkg; return 0; @@ -939,9 +933,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, radix_tree_preload_end(); fail_unlock: spin_unlock_irq(&q->queue_lock); -fail_exit_queue: - blk_queue_exit(q); -fail: +fail_exit: + mutex_unlock(&q->blkcg_mutex); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue -- 2.39.2
2 1
0 0
[PATCH OLK-5.10 v2] md/raid0, raid10: Don't set discard sectors for request queue
by Zheng Qixing 26 Dec '25

26 Dec '25
From: Xiao Ni <xni(a)redhat.com> mainline inclusion from mainline-v6.2-rc1 commit 8e1a2279ca2b0485cc379a153d02a9793f74a48f category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/11303 CVE: CVE-2022-50583 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… ------------------ It should use disk_stack_limits to get a proper max_discard_sectors rather than setting a value by stack drivers. And there is a bug. If all member disks are rotational devices, raid0/raid10 set max_discard_sectors. So the member devices are not ssd/nvme, but raid0/raid10 export the wrong value. It reports warning messages in function __blkdev_issue_discard when mkfs.xfs like this: [ 4616.022599] ------------[ cut here ]------------ [ 4616.027779] WARNING: CPU: 4 PID: 99634 at block/blk-lib.c:50 __blkdev_issue_discard+0x16a/0x1a0 [ 4616.140663] RIP: 0010:__blkdev_issue_discard+0x16a/0x1a0 [ 4616.146601] Code: 24 4c 89 20 31 c0 e9 fe fe ff ff c1 e8 09 8d 48 ff 4c 89 f0 4c 09 e8 48 85 c1 0f 84 55 ff ff ff b8 ea ff ff ff e9 df fe ff ff <0f> 0b 48 8d 74 24 08 e8 ea d6 00 00 48 c7 c6 20 1e 89 ab 48 c7 c7 [ 4616.167567] RSP: 0018:ffffaab88cbffca8 EFLAGS: 00010246 [ 4616.173406] RAX: ffff9ba1f9e44678 RBX: 0000000000000000 RCX: ffff9ba1c9792080 [ 4616.181376] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9ba1c9792080 [ 4616.189345] RBP: 0000000000000cc0 R08: ffffaab88cbffd10 R09: 0000000000000000 [ 4616.197317] R10: 0000000000000012 R11: 0000000000000000 R12: 0000000000000000 [ 4616.205288] R13: 0000000000400000 R14: 0000000000000cc0 R15: ffff9ba1c9792080 [ 4616.213259] FS: 00007f9a5534e980(0000) GS:ffff9ba1b7c80000(0000) knlGS:0000000000000000 [ 4616.222298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4616.228719] CR2: 000055a390a4c518 CR3: 0000000123e40006 CR4: 00000000001706e0 [ 4616.236689] Call Trace: [ 4616.239428] blkdev_issue_discard+0x52/0xb0 [ 4616.244108] blkdev_common_ioctl+0x43c/0xa00 [ 4616.248883] blkdev_ioctl+0x116/0x280 [ 4616.252977] __x64_sys_ioctl+0x8a/0xc0 [ 4616.257163] do_syscall_64+0x5c/0x90 [ 4616.261164] ? handle_mm_fault+0xc5/0x2a0 [ 4616.265652] ? do_user_addr_fault+0x1d8/0x690 [ 4616.270527] ? do_syscall_64+0x69/0x90 [ 4616.274717] ? exc_page_fault+0x62/0x150 [ 4616.279097] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 4616.284748] RIP: 0033:0x7f9a55398c6b Signed-off-by: Xiao Ni <xni(a)redhat.com> Reported-by: Yi Zhang <yi.zhang(a)redhat.com> Reviewed-by: Ming Lei <ming.lei(a)redhat.com> Signed-off-by: Song Liu <song(a)kernel.org> Conflicts: drivers/md/raid0.c [Context conflicts.] drivers/md/raid10.c [Due to commit bcc90d280465e ("md/raid10: improve raid10 discard request") has been reverted.] Signed-off-by: Zheng Qixing <zhengqixing(a)huawei.com> --- drivers/md/raid0.c | 1 - drivers/md/raid10.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 8f71ae30a3b3..f812061cb668 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -398,7 +398,6 @@ static int raid0_run(struct mddev *mddev) blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_discard_sectors(mddev->queue, UINT_MAX); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); blk_queue_io_opt(mddev->queue, diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index bd99296c7b6a..f272021d4e8b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3778,8 +3778,6 @@ static int raid10_run(struct mddev *mddev) } if (mddev->queue) { - blk_queue_max_discard_sectors(mddev->queue, - mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, 0); blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); -- 2.39.2
2 1
0 0
[openeuler:OLK-6.6 13/13] drivers/ub/ubus/cap.c:12:6: warning: no previous prototype for 'ub_set_cap_bitmap'
by kernel test robot 26 Dec '25

26 Dec '25
tree: https://gitee.com/openeuler/kernel.git OLK-6.6 head: 03c9c649e6f28e26260d276ce755f785b2435da3 commit: c21b483526d766fd97d491f3951433f69fde3ac3 [13/13] ub:ubus: Add UBUS capability interfaces config: arm64-randconfig-004-20251226 (https://download.01.org/0day-ci/archive/20251226/202512261449.EGuzfA3r-lkp@…) compiler: aarch64-linux-gcc (GCC) 13.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251226/202512261449.EGuzfA3r-lkp@…) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp(a)intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202512261449.EGuzfA3r-lkp@intel.com/ All warnings (new ones prefixed by >>): >> drivers/ub/ubus/cap.c:12:6: warning: no previous prototype for 'ub_set_cap_bitmap' [-Wmissing-prototypes] 12 | void ub_set_cap_bitmap(struct ub_entity *uent) | ^~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:47:5: warning: no previous prototype for 'ub_find_capability' [-Wmissing-prototypes] 47 | u32 ub_find_capability(u32 cap) | ^~~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:52:5: warning: no previous prototype for 'ub_cap_read_byte' [-Wmissing-prototypes] 52 | int ub_cap_read_byte(struct ub_entity *uent, u32 cap, u32 off, u8 *val) | ^~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:70:5: warning: no previous prototype for 'ub_cap_read_word' [-Wmissing-prototypes] 70 | int ub_cap_read_word(struct ub_entity *uent, u32 cap, u32 off, u16 *val) | ^~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:88:5: warning: no previous prototype for 'ub_cap_read_dword' [-Wmissing-prototypes] 88 | int ub_cap_read_dword(struct ub_entity *uent, u32 cap, u32 off, u32 *val) | ^~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:106:5: warning: no previous prototype for 'ub_cap_write_byte' [-Wmissing-prototypes] 106 | int ub_cap_write_byte(struct ub_entity *uent, u32 cap, u32 off, u8 val) | ^~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:117:5: warning: no previous prototype for 'ub_cap_write_word' [-Wmissing-prototypes] 117 | int ub_cap_write_word(struct ub_entity *uent, u32 cap, u32 off, u16 val) | ^~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:128:5: warning: no previous prototype for 'ub_cap_write_dword' [-Wmissing-prototypes] 128 | int ub_cap_write_dword(struct ub_entity *uent, u32 cap, u32 off, u32 val) | ^~~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:139:5: warning: no previous prototype for 'ub_cap_clear_and_set_word' [-Wmissing-prototypes] 139 | int ub_cap_clear_and_set_word(struct ub_entity *dev, u32 cap, u32 off, | ^~~~~~~~~~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:155:5: warning: no previous prototype for 'ub_cap_clear_and_set_dword' [-Wmissing-prototypes] 155 | int ub_cap_clear_and_set_dword(struct ub_entity *dev, u32 cap, u32 off, | ^~~~~~~~~~~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:224:6: warning: no previous prototype for 'ub_init_capabilities' [-Wmissing-prototypes] 224 | void ub_init_capabilities(struct ub_entity *uent) | ^~~~~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/cap.c:238:6: warning: no previous prototype for 'ub_uninit_capabilities' [-Wmissing-prototypes] 238 | void ub_uninit_capabilities(struct ub_entity *uent) | ^~~~~~~~~~~~~~~~~~~~~~ Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for RESCTRL_FS Depends on [n]: MISC_FILESYSTEMS [=n] && ARCH_HAS_CPU_RESCTRL [=y] Selected by [y]: - ARM64_MPAM [=y] vim +/ub_set_cap_bitmap +12 drivers/ub/ubus/cap.c 11 > 12 void ub_set_cap_bitmap(struct ub_entity *uent) 13 { 14 int ret; 15 u32 i; 16 17 for (i = 0; i < SZ_8; i++) { 18 ret = ub_cfg_read_dword(uent, UB_CFG1_CAP_BITMAP + (i << SZ_2), 19 &uent->cfg1_bitmap[i]); 20 if (ret) 21 ub_err(uent, "Read cfg1 cap bitmap failed, ret=%d\n", 22 ret); 23 } 24 25 if (is_p_device(uent)) 26 return; 27 28 for (i = 0; i < SZ_8; i++) { 29 ret = ub_cfg_read_dword(uent, UB_CFG0_CAP_BITMAP + (i << SZ_2), 30 &uent->cfg0_bitmap[i]); 31 if (ret) 32 ub_err(uent, "Read cfg0 cap bitmap failed, ret=%d\n", 33 ret); 34 } 35 } 36 37 /* Check whether the capbility register is implemented. */ 38 static bool ub_cap_reg_implemented(struct ub_entity *uent, u32 cap) 39 { 40 u32 i = (cap & 0xFF) / SZ_32; 41 u32 val = (cap >> SZ_8) ? uent->cfg1_bitmap[i] : uent->cfg0_bitmap[i]; 42 43 return val & BIT((cap & 0xFF) % SZ_32); 44 } 45 46 /* find the start address of capability */ > 47 u32 ub_find_capability(u32 cap) 48 { 49 return (cap << BITS_PER_BYTE) << SZ_2; 50 } 51 > 52 int ub_cap_read_byte(struct ub_entity *uent, u32 cap, u32 off, u8 *val) 53 { 54 int ret; 55 56 *val = 0; 57 if (off >= SZ_1K) 58 return -EFAULT; 59 60 if (!ub_cap_reg_implemented(uent, cap)) 61 return -ENXIO; 62 63 ret = ub_cfg_read_byte(uent, ub_find_capability(cap) + off, val); 64 if (ret) 65 *val = 0; 66 67 return ret; 68 } 69 > 70 int ub_cap_read_word(struct ub_entity *uent, u32 cap, u32 off, u16 *val) 71 { 72 int ret; 73 74 *val = 0; 75 if (off & 1 || off >= SZ_1K) 76 return -EFAULT; 77 78 if (!ub_cap_reg_implemented(uent, cap)) 79 return -ENXIO; 80 81 ret = ub_cfg_read_word(uent, ub_find_capability(cap) + off, val); 82 if (ret) 83 *val = 0; 84 85 return ret; 86 } 87 > 88 int ub_cap_read_dword(struct ub_entity *uent, u32 cap, u32 off, u32 *val) 89 { 90 int ret; 91 92 *val = 0; 93 if (off & DW_CHECK || off >= SZ_1K) 94 return -EFAULT; 95 96 if (!ub_cap_reg_implemented(uent, cap)) 97 return -ENXIO; 98 99 ret = ub_cfg_read_dword(uent, ub_find_capability(cap) + off, val); 100 if (ret) 101 *val = 0; 102 103 return ret; 104 } 105 > 106 int ub_cap_write_byte(struct ub_entity *uent, u32 cap, u32 off, u8 val) 107 { 108 if (off >= SZ_1K) 109 return -EFAULT; 110 111 if (!ub_cap_reg_implemented(uent, cap)) 112 return -ENXIO; 113 114 return ub_cfg_write_byte(uent, ub_find_capability(cap) + off, val); 115 } 116 > 117 int ub_cap_write_word(struct ub_entity *uent, u32 cap, u32 off, u16 val) 118 { 119 if (off & 1 || off >= SZ_1K) 120 return -EFAULT; 121 122 if (!ub_cap_reg_implemented(uent, cap)) 123 return -ENXIO; 124 125 return ub_cfg_write_word(uent, ub_find_capability(cap) + off, val); 126 } 127 > 128 int ub_cap_write_dword(struct ub_entity *uent, u32 cap, u32 off, u32 val) 129 { 130 if (off & DW_CHECK || off >= SZ_1K) 131 return -EFAULT; 132 133 if (!ub_cap_reg_implemented(uent, cap)) 134 return -ENXIO; 135 136 return ub_cfg_write_dword(uent, ub_find_capability(cap) + off, val); 137 } 138 > 139 int ub_cap_clear_and_set_word(struct ub_entity *dev, u32 cap, u32 off, 140 u16 clear, u16 set) 141 { 142 u16 val; 143 int ret; 144 145 ret = ub_cap_read_word(dev, cap, off, &val); 146 if (!ret) { 147 val &= ~clear; 148 val |= set; 149 ret = ub_cap_write_word(dev, cap, off, val); 150 } 151 152 return ret; 153 } 154 > 155 int ub_cap_clear_and_set_dword(struct ub_entity *dev, u32 cap, u32 off, 156 u32 clear, u32 set) 157 { 158 u32 val; 159 int ret; 160 161 ret = ub_cap_read_dword(dev, cap, off, &val); 162 if (!ret) { 163 val &= ~clear; 164 val |= set; 165 ret = ub_cap_write_dword(dev, cap, off, val); 166 } 167 168 return ret; 169 } 170 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
1 0
0 0
[openeuler:OLK-6.6 13/13] drivers/ub/ubus/port.c:31:5: warning: no previous prototype for 'ub_port_read_byte'
by kernel test robot 26 Dec '25

26 Dec '25
tree: https://gitee.com/openeuler/kernel.git OLK-6.6 head: 52a3b0aa13aa2325ca2caf528fbcaba06ad3bab5 commit: 210be966e79360fac509e7d66f9adb1712146c14 [13/13] ub:ubus: Support for UB port sysfs attribute files config: arm64-randconfig-004-20251226 (https://download.01.org/0day-ci/archive/20251226/202512261132.sMR3WqA2-lkp@…) compiler: aarch64-linux-gcc (GCC) 13.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251226/202512261132.sMR3WqA2-lkp@…) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp(a)intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202512261132.sMR3WqA2-lkp@intel.com/ All warnings (new ones prefixed by >>): >> drivers/ub/ubus/port.c:31:5: warning: no previous prototype for 'ub_port_read_byte' [-Wmissing-prototypes] 31 | int ub_port_read_byte(struct ub_port *port, u32 pos, u8 *val) | ^~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/port.c:38:5: warning: no previous prototype for 'ub_port_read_word' [-Wmissing-prototypes] 38 | int ub_port_read_word(struct ub_port *port, u32 pos, u16 *val) | ^~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/port.c:45:5: warning: no previous prototype for 'ub_port_read_dword' [-Wmissing-prototypes] 45 | int ub_port_read_dword(struct ub_port *port, u32 pos, u32 *val) | ^~~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/port.c:52:5: warning: no previous prototype for 'ub_port_write_byte' [-Wmissing-prototypes] 52 | int ub_port_write_byte(struct ub_port *port, u32 pos, u8 val) | ^~~~~~~~~~~~~~~~~~ >> drivers/ub/ubus/port.c:59:5: warning: no previous prototype for 'ub_port_write_word' [-Wmissing-prototypes] 59 | int ub_port_write_word(struct ub_port *port, u32 pos, u16 val) | ^~~~~~~~~~~~~~~~~~ drivers/ub/ubus/port.c:66:5: warning: no previous prototype for 'ub_port_write_dword' [-Wmissing-prototypes] 66 | int ub_port_write_dword(struct ub_port *port, u32 pos, u32 val) | ^~~~~~~~~~~~~~~~~~~ Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for RESCTRL_FS Depends on [n]: MISC_FILESYSTEMS [=n] && ARCH_HAS_CPU_RESCTRL [=y] Selected by [y]: - ARM64_MPAM [=y] vim +/ub_port_read_byte +31 drivers/ub/ubus/port.c 21 22 #define UB_PORT_ATTR_RO(field) \ 23 static struct ub_port_attribute ub_port_attr_##field = __ATTR_RO(field) 24 25 #define UB_PORT_ATTR_RW(field) \ 26 static struct ub_port_attribute ub_port_attr_##field = __ATTR_RW(field) 27 28 #define UB_PORT_ATTR_WO(field) \ 29 static struct ub_port_attribute ub_port_attr_##field = __ATTR_WO(field) 30 > 31 int ub_port_read_byte(struct ub_port *port, u32 pos, u8 *val) 32 { 33 u64 base = UB_PORT_SLICE_START + port->index * UB_PORT_SLICE_SIZE; 34 35 return ub_cfg_read_byte(port->uent, base + pos, val); 36 } 37 > 38 int ub_port_read_word(struct ub_port *port, u32 pos, u16 *val) 39 { 40 u64 base = UB_PORT_SLICE_START + port->index * UB_PORT_SLICE_SIZE; 41 42 return ub_cfg_read_word(port->uent, base + pos, val); 43 } 44 > 45 int ub_port_read_dword(struct ub_port *port, u32 pos, u32 *val) 46 { 47 u64 base = UB_PORT_SLICE_START + port->index * UB_PORT_SLICE_SIZE; 48 49 return ub_cfg_read_dword(port->uent, base + pos, val); 50 } 51 > 52 int ub_port_write_byte(struct ub_port *port, u32 pos, u8 val) 53 { 54 u64 base = UB_PORT_SLICE_START + port->index * UB_PORT_SLICE_SIZE; 55 56 return ub_cfg_write_byte(port->uent, base + pos, val); 57 } 58 > 59 int ub_port_write_word(struct ub_port *port, u32 pos, u16 val) 60 { 61 u64 base = UB_PORT_SLICE_START + port->index * UB_PORT_SLICE_SIZE; 62 63 return ub_cfg_write_word(port->uent, base + pos, val); 64 } 65 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
1 0
0 0
[PATCH OLK-5.10] ext4: xattr: fix null pointer deref in ext4_raw_inode()
by Yongjian Sun 26 Dec '25

26 Dec '25
From: Karina Yankevich <k.yankevich(a)omp.ru> mainline inclusion from mainline-v6.19-rc1 commit b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/13034 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… -------------------------------- If ext4_get_inode_loc() fails (e.g. if it returns -EFSCORRUPTED), iloc.bh will remain set to NULL. Since ext4_xattr_inode_dec_ref_all() lacks error checking, this will lead to a null pointer dereference in ext4_raw_inode(), called right after ext4_get_inode_loc(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c8e008b60492 ("ext4: ignore xattrs past end") Cc: stable(a)kernel.org Signed-off-by: Karina Yankevich <k.yankevich(a)omp.ru> Reviewed-by: Sergey Shtylyov <s.shtylyov(a)omp.ru> Reviewed-by: Baokun Li <libaokun1(a)huawei.com> Message-ID: <20251022093253.3546296-1-k.yankevich(a)omp.ru> Signed-off-by: Theodore Ts'o <tytso(a)mit.edu> Signed-off-by: Yongjian Sun <sunyongjian1(a)huawei.com> --- fs/ext4/xattr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3e5efa68698e..fd44b8c0c4af 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1133,7 +1133,11 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, if (block_csum) end = (void *)bh->b_data + bh->b_size; else { - ext4_get_inode_loc(parent, &iloc); + err = ext4_get_inode_loc(parent, &iloc); + if (err) { + EXT4_ERROR_INODE(parent, "parent inode loc (error %d)", err); + return; + } end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size; } -- 2.39.2
2 1
0 0
[PATCH OLK-6.6] ext4: xattr: fix null pointer deref in ext4_raw_inode()
by Yongjian Sun 26 Dec '25

26 Dec '25
From: Karina Yankevich <k.yankevich(a)omp.ru> mainline inclusion from mainline-v6.19-rc1 commit b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/13034 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… -------------------------------- If ext4_get_inode_loc() fails (e.g. if it returns -EFSCORRUPTED), iloc.bh will remain set to NULL. Since ext4_xattr_inode_dec_ref_all() lacks error checking, this will lead to a null pointer dereference in ext4_raw_inode(), called right after ext4_get_inode_loc(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c8e008b60492 ("ext4: ignore xattrs past end") Cc: stable(a)kernel.org Signed-off-by: Karina Yankevich <k.yankevich(a)omp.ru> Reviewed-by: Sergey Shtylyov <s.shtylyov(a)omp.ru> Reviewed-by: Baokun Li <libaokun1(a)huawei.com> Message-ID: <20251022093253.3546296-1-k.yankevich(a)omp.ru> Signed-off-by: Theodore Ts'o <tytso(a)mit.edu> Signed-off-by: Yongjian Sun <sunyongjian1(a)huawei.com> --- fs/ext4/xattr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 6ef8820625b9..5009fdcfe150 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1184,7 +1184,11 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, if (block_csum) end = (void *)bh->b_data + bh->b_size; else { - ext4_get_inode_loc(parent, &iloc); + err = ext4_get_inode_loc(parent, &iloc); + if (err) { + EXT4_ERROR_INODE(parent, "parent inode loc (error %d)", err); + return; + } end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size; } -- 2.39.2
2 1
0 0
[PATCH OLK-6.6] jbd2: avoid bug_on in jbd2_journal_get_create_access() when file system corrupted
by Yongjian Sun 26 Dec '25

26 Dec '25
From: Ye Bin <yebin10(a)huawei.com> mainline inclusion from mainline-v6.19-rc1 commit 986835bf4d11032bba4ab8414d18fce038c61bb4 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/12698 CVE: CVE-2025-68337 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- There's issue when file system corrupted: ------------[ cut here ]------------ kernel BUG at fs/jbd2/transaction.c:1289! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 5 UID: 0 PID: 2031 Comm: mkdir Not tainted 6.18.0-rc1-next RIP: 0010:jbd2_journal_get_create_access+0x3b6/0x4d0 RSP: 0018:ffff888117aafa30 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff88811a86b000 RCX: ffffffff89a63534 RDX: 1ffff110200ec602 RSI: 0000000000000004 RDI: ffff888100763010 RBP: ffff888100763000 R08: 0000000000000001 R09: ffff888100763028 R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88812c432000 R14: ffff88812c608000 R15: ffff888120bfc000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f91d6970c99 CR3: 00000001159c4000 CR4: 00000000000006f0 Call Trace: <TASK> __ext4_journal_get_create_access+0x42/0x170 ext4_getblk+0x319/0x6f0 ext4_bread+0x11/0x100 ext4_append+0x1e6/0x4a0 ext4_init_new_dir+0x145/0x1d0 ext4_mkdir+0x326/0x920 vfs_mkdir+0x45c/0x740 do_mkdirat+0x234/0x2f0 __x64_sys_mkdir+0xd6/0x120 do_syscall_64+0x5f/0xfa0 entry_SYSCALL_64_after_hwframe+0x76/0x7e The above issue occurs with us in errors=continue mode when accompanied by storage failures. There have been many inconsistencies in the file system data. In the case of file system data inconsistency, for example, if the block bitmap of a referenced block is not set, it can lead to the situation where a block being committed is allocated and used again. As a result, the following condition will not be satisfied then trigger BUG_ON. Of course, it is entirely possible to construct a problematic image that can trigger this BUG_ON through specific operations. In fact, I have constructed such an image and easily reproduced this issue. Therefore, J_ASSERT() holds true only under ideal conditions, but it may not necessarily be satisfied in exceptional scenarios. Using J_ASSERT() directly in abnormal situations would cause the system to crash, which is clearly not what we want. So here we directly trigger a JBD abort instead of immediately invoking BUG_ON. Fixes: 470decc613ab ("[PATCH] jbd2: initial copy of files from jbd") Signed-off-by: Ye Bin <yebin10(a)huawei.com> Reviewed-by: Jan Kara <jack(a)suse.cz> Message-ID: <20251025072657.307851-1-yebin(a)huaweicloud.com> Signed-off-by: Theodore Ts'o <tytso(a)mit.edu> Cc: stable(a)kernel.org Signed-off-by: Yongjian Sun <sunyongjian1(a)huawei.com> --- fs/jbd2/transaction.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index c2b8ad0b24c4..970bebe1dadb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1274,14 +1274,23 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) * committing transaction's lists, but it HAS to be in Forget state in * that case: the transaction must have deleted the buffer for it to be * reused here. + * In the case of file system data inconsistency, for example, if the + * block bitmap of a referenced block is not set, it can lead to the + * situation where a block being committed is allocated and used again. + * As a result, the following condition will not be satisfied, so here + * we directly trigger a JBD abort instead of immediately invoking + * bugon. */ spin_lock(&jh->b_state_lock); - J_ASSERT_JH(jh, (jh->b_transaction == transaction || - jh->b_transaction == NULL || - (jh->b_transaction == journal->j_committing_transaction && - jh->b_jlist == BJ_Forget))); + if (!(jh->b_transaction == transaction || jh->b_transaction == NULL || + (jh->b_transaction == journal->j_committing_transaction && + jh->b_jlist == BJ_Forget)) || jh->b_next_transaction != NULL) { + err = -EROFS; + spin_unlock(&jh->b_state_lock); + jbd2_journal_abort(journal, err); + goto out; + } - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); if (jh->b_transaction == NULL) { -- 2.39.2
2 1
0 0
[openeuler:OLK-6.6 13/13] drivers/ub/ubus/ubus_config.c:191:6: warning: no previous prototype for 'ub_sync_cfg_rsp_handle'
by kernel test robot 26 Dec '25

26 Dec '25
tree: https://gitee.com/openeuler/kernel.git OLK-6.6 head: 52a3b0aa13aa2325ca2caf528fbcaba06ad3bab5 commit: 280895301d3bc3507606cbd2e3cdccba75a8dcdc [13/13] ub:ubus: Support Ubus read/write configuration functions config: arm64-randconfig-004-20251226 (https://download.01.org/0day-ci/archive/20251226/202512260914.fC4Pfn5g-lkp@…) compiler: aarch64-linux-gcc (GCC) 13.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251226/202512260914.fC4Pfn5g-lkp@…) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp(a)intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202512260914.fC4Pfn5g-lkp@intel.com/ All warnings (new ones prefixed by >>): >> drivers/ub/ubus/ubus_config.c:191:6: warning: no previous prototype for 'ub_sync_cfg_rsp_handle' [-Wmissing-prototypes] 191 | void ub_sync_cfg_rsp_handle(struct cfg_msg_pld_rsp *rsp, u8 size, | ^~~~~~~~~~~~~~~~~~~~~~ drivers/ub/ubus/ubus_config.c:310:5: warning: no previous prototype for '__ub_cfg_read_byte' [-Wmissing-prototypes] 310 | int __ub_cfg_read_byte(struct ub_entity *uent, u64 pos, u8 *val) | ^~~~~~~~~~~~~~~~~~ drivers/ub/ubus/ubus_config.c:320:5: warning: no previous prototype for '__ub_cfg_read_word' [-Wmissing-prototypes] 320 | int __ub_cfg_read_word(struct ub_entity *uent, u64 pos, u16 *val) | ^~~~~~~~~~~~~~~~~~ drivers/ub/ubus/ubus_config.c:330:5: warning: no previous prototype for '__ub_cfg_read_dword' [-Wmissing-prototypes] 330 | int __ub_cfg_read_dword(struct ub_entity *uent, u64 pos, u32 *val) | ^~~~~~~~~~~~~~~~~~~ drivers/ub/ubus/ubus_config.c:340:5: warning: no previous prototype for '__ub_cfg_write_byte' [-Wmissing-prototypes] 340 | int __ub_cfg_write_byte(struct ub_entity *uent, u64 pos, u8 val) | ^~~~~~~~~~~~~~~~~~~ drivers/ub/ubus/ubus_config.c:350:5: warning: no previous prototype for '__ub_cfg_write_word' [-Wmissing-prototypes] 350 | int __ub_cfg_write_word(struct ub_entity *uent, u64 pos, u16 val) | ^~~~~~~~~~~~~~~~~~~ drivers/ub/ubus/ubus_config.c:360:5: warning: no previous prototype for '__ub_cfg_write_dword' [-Wmissing-prototypes] 360 | int __ub_cfg_write_dword(struct ub_entity *uent, u64 pos, u32 val) | ^~~~~~~~~~~~~~~~~~~~ Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for RESCTRL_FS Depends on [n]: MISC_FILESYSTEMS [=n] && ARCH_HAS_CPU_RESCTRL [=y] Selected by [y]: - ARM64_MPAM [=y] vim +/ub_sync_cfg_rsp_handle +191 drivers/ub/ubus/ubus_config.c 190 > 191 void ub_sync_cfg_rsp_handle(struct cfg_msg_pld_rsp *rsp, u8 size, 192 u64 pos, bool write, u32 *val) 193 { 194 #define UB_CFG_REG_SIZE 4 195 u8 pos_in_reg = pos % UB_CFG_REG_SIZE; 196 u32 read_data; 197 198 if (!write) { 199 read_data = rsp->read_data >> (pos_in_reg * BITS_PER_BYTE); 200 if (size == sizeof(u8)) 201 *(u8 *)val = read_data; 202 else if (size == sizeof(u16)) 203 *(u16 *)val = read_data; 204 else 205 *val = read_data; 206 } 207 } 208 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
1 0
0 0
[PATCH OLK-6.6] fs: ext4: change GFP_KERNEL to GFP_NOFS to avoid deadlock
by Yongjian Sun 26 Dec '25

26 Dec '25
From: chuguangqing <chuguangqing(a)inspur.com> mainline inclusion from mainline-v6.18-rc1 commit 1534f72dc2a11ded38b0e0268fbcc0ca24e9fd4a category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/12622 CVE: CVE-2025-40361 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- The parent function ext4_xattr_inode_lookup_create already uses GFP_NOFS for memory alloction, so the function ext4_xattr_inode_cache_find should use same gfp_flag. Signed-off-by: chuguangqing <chuguangqing(a)inspur.com> Signed-off-by: Theodore Ts'o <tytso(a)mit.edu> Signed-off-by: Yongjian Sun <sunyongjian1(a)huawei.com> --- fs/ext4/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 5009fdcfe150..cd906aa08afa 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1549,7 +1549,7 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) && !(current->flags & PF_MEMALLOC_NOFS)); - ea_data = kvmalloc(value_len, GFP_KERNEL); + ea_data = kvmalloc(value_len, GFP_NOFS); if (!ea_data) { mb_cache_entry_put(ea_inode_cache, ce); return NULL; -- 2.39.2
2 1
0 0
  • ← Newer
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • ...
  • 2226
  • Older →

HyperKitty Powered by HyperKitty