This patch set fix CVE-2024-39496.
Christoph Hellwig (4): btrfs: zoned: introduce a zone_info struct in btrfs_load_block_group_zone_info btrfs: zoned: factor out per-zone logic from btrfs_load_block_group_zone_info btrfs: zoned: factor out single bg handling from btrfs_load_block_group_zone_info btrfs: zoned: factor out DUP bg handling from btrfs_load_block_group_zone_info
Filipe Manana (1): btrfs: zoned: fix use-after-free due to race with dev replace
fs/btrfs/zoned.c | 330 ++++++++++++++++++++++++----------------------- 1 file changed, 170 insertions(+), 160 deletions(-)
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.6-rc5 commit 15c12fcc50a1b12a747f8b6ec05cdb18c537a4d1 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACZX0 CVE: CVE-2024-39496
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Add a new zone_info structure to hold per-zone information in btrfs_load_block_group_zone_info and prepare for breaking out helpers from it.
Reviewed-by: Johannes Thumshirn johannes.thumshirn@wdc.com Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Long Li leo.lilong@huawei.com --- fs/btrfs/zoned.c | 84 +++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 47 deletions(-)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 41a8cdce5d9f..572203aab7af 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1282,6 +1282,12 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache, return ret; }
+struct zone_info { + u64 physical; + u64 capacity; + u64 alloc_offset; +}; + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; @@ -1291,12 +1297,10 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) struct btrfs_device *device; u64 logical = cache->start; u64 length = cache->length; + struct zone_info *zone_info = NULL; int ret; int i; unsigned int nofs_flag; - u64 *alloc_offsets = NULL; - u64 *caps = NULL; - u64 *physical = NULL; unsigned long *active = NULL; u64 last_alloc = 0; u32 num_sequential = 0, num_conventional = 0; @@ -1328,20 +1332,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; }
- alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); - if (!alloc_offsets) { - ret = -ENOMEM; - goto out; - } - - caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS); - if (!caps) { - ret = -ENOMEM; - goto out; - } - - physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS); - if (!physical) { + zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); + if (!zone_info) { ret = -ENOMEM; goto out; } @@ -1353,20 +1345,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) }
for (i = 0; i < map->num_stripes; i++) { + struct zone_info *info = &zone_info[i]; bool is_sequential; struct blk_zone zone; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int dev_replace_is_ongoing = 0;
device = map->stripes[i].dev; - physical[i] = map->stripes[i].physical; + info->physical = map->stripes[i].physical;
if (device->bdev == NULL) { - alloc_offsets[i] = WP_MISSING_DEV; + info->alloc_offset = WP_MISSING_DEV; continue; }
- is_sequential = btrfs_dev_is_sequential(device, physical[i]); + is_sequential = btrfs_dev_is_sequential(device, info->physical); if (is_sequential) num_sequential++; else @@ -1380,7 +1373,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) __set_bit(i, active);
if (!is_sequential) { - alloc_offsets[i] = WP_CONVENTIONAL; + info->alloc_offset = WP_CONVENTIONAL; continue; }
@@ -1388,25 +1381,25 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) * This zone will be used for allocation, so mark this zone * non-empty. */ - btrfs_dev_clear_zone_empty(device, physical[i]); + btrfs_dev_clear_zone_empty(device, info->physical);
down_read(&dev_replace->rwsem); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) - btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]); + btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); up_read(&dev_replace->rwsem);
/* * The group is mapped to a sequential zone. Get the zone write * pointer to determine the allocation offset within the zone. */ - WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size)); + WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); nofs_flag = memalloc_nofs_save(); - ret = btrfs_get_dev_zone(device, physical[i], &zone); + ret = btrfs_get_dev_zone(device, info->physical, &zone); memalloc_nofs_restore(nofs_flag); if (ret == -EIO || ret == -EOPNOTSUPP) { ret = 0; - alloc_offsets[i] = WP_MISSING_DEV; + info->alloc_offset = WP_MISSING_DEV; continue; } else if (ret) { goto out; @@ -1421,27 +1414,26 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; }
- caps[i] = (zone.capacity << SECTOR_SHIFT); + info->capacity = (zone.capacity << SECTOR_SHIFT);
switch (zone.cond) { case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: btrfs_err(fs_info, "zoned: offline/readonly zone %llu on device %s (devid %llu)", - physical[i] >> device->zone_info->zone_size_shift, + info->physical >> device->zone_info->zone_size_shift, rcu_str_deref(device->name), device->devid); - alloc_offsets[i] = WP_MISSING_DEV; + info->alloc_offset = WP_MISSING_DEV; break; case BLK_ZONE_COND_EMPTY: - alloc_offsets[i] = 0; + info->alloc_offset = 0; break; case BLK_ZONE_COND_FULL: - alloc_offsets[i] = caps[i]; + info->alloc_offset = info->capacity; break; default: /* Partially used zone */ - alloc_offsets[i] = - ((zone.wp - zone.start) << SECTOR_SHIFT); + info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); __set_bit(i, active); break; } @@ -1468,15 +1460,15 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { case 0: /* single */ - if (alloc_offsets[0] == WP_MISSING_DEV) { + if (zone_info[0].alloc_offset == WP_MISSING_DEV) { btrfs_err(fs_info, "zoned: cannot recover write pointer for zone %llu", - physical[0]); + zone_info[0].physical); ret = -EIO; goto out; } - cache->alloc_offset = alloc_offsets[0]; - cache->zone_capacity = caps[0]; + cache->alloc_offset = zone_info[0].alloc_offset; + cache->zone_capacity = zone_info[0].capacity; if (test_bit(0, active)) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); break; @@ -1486,21 +1478,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ret = -EINVAL; goto out; } - if (alloc_offsets[0] == WP_MISSING_DEV) { + if (zone_info[0].alloc_offset == WP_MISSING_DEV) { btrfs_err(fs_info, "zoned: cannot recover write pointer for zone %llu", - physical[0]); + zone_info[0].physical); ret = -EIO; goto out; } - if (alloc_offsets[1] == WP_MISSING_DEV) { + if (zone_info[1].alloc_offset == WP_MISSING_DEV) { btrfs_err(fs_info, "zoned: cannot recover write pointer for zone %llu", - physical[1]); + zone_info[1].physical); ret = -EIO; goto out; } - if (alloc_offsets[0] != alloc_offsets[1]) { + if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { btrfs_err(fs_info, "zoned: write pointer offset mismatch of zones in DUP profile"); ret = -EIO; @@ -1516,8 +1508,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); } - cache->alloc_offset = alloc_offsets[0]; - cache->zone_capacity = min(caps[0], caps[1]); + cache->alloc_offset = zone_info[0].alloc_offset; + cache->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID0: @@ -1570,9 +1562,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) cache->physical_map = NULL; } bitmap_free(active); - kfree(physical); - kfree(caps); - kfree(alloc_offsets); + kfree(zone_info); free_extent_map(em);
return ret;
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.6-rc5 commit 09a46725cc84165af452d978a3532d6b97a28796 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACZX0 CVE: CVE-2024-39496
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Split out a helper for the body of the per-zone loop in btrfs_load_block_group_zone_info to make the function easier to read and modify.
Reviewed-by: Johannes Thumshirn johannes.thumshirn@wdc.com Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Long Li leo.lilong@huawei.com --- fs/btrfs/zoned.c | 184 +++++++++++++++++++++++------------------------ 1 file changed, 92 insertions(+), 92 deletions(-)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 572203aab7af..7dbb81fa9edd 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1288,19 +1288,103 @@ struct zone_info { u64 alloc_offset; };
+static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, + struct zone_info *info, unsigned long *active, + struct map_lookup *map) +{ + struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; + struct btrfs_device *device = map->stripes[zone_idx].dev; + int dev_replace_is_ongoing = 0; + unsigned int nofs_flag; + struct blk_zone zone; + int ret; + + info->physical = map->stripes[zone_idx].physical; + + if (!device->bdev) { + info->alloc_offset = WP_MISSING_DEV; + return 0; + } + + /* Consider a zone as active if we can allow any number of active zones. */ + if (!device->zone_info->max_active_zones) + __set_bit(zone_idx, active); + + if (!btrfs_dev_is_sequential(device, info->physical)) { + info->alloc_offset = WP_CONVENTIONAL; + return 0; + } + + /* This zone will be used for allocation, so mark this zone non-empty. */ + btrfs_dev_clear_zone_empty(device, info->physical); + + down_read(&dev_replace->rwsem); + dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) + btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); + up_read(&dev_replace->rwsem); + + /* + * The group is mapped to a sequential zone. Get the zone write pointer + * to determine the allocation offset within the zone. + */ + WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); + nofs_flag = memalloc_nofs_save(); + ret = btrfs_get_dev_zone(device, info->physical, &zone); + memalloc_nofs_restore(nofs_flag); + if (ret) { + if (ret != -EIO && ret != -EOPNOTSUPP) + return ret; + info->alloc_offset = WP_MISSING_DEV; + return 0; + } + + if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { + btrfs_err_in_rcu(fs_info, + "zoned: unexpected conventional zone %llu on device %s (devid %llu)", + zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), + device->devid); + return -EIO; + } + + info->capacity = (zone.capacity << SECTOR_SHIFT); + + switch (zone.cond) { + case BLK_ZONE_COND_OFFLINE: + case BLK_ZONE_COND_READONLY: + btrfs_err(fs_info, + "zoned: offline/readonly zone %llu on device %s (devid %llu)", + (info->physical >> device->zone_info->zone_size_shift), + rcu_str_deref(device->name), device->devid); + info->alloc_offset = WP_MISSING_DEV; + break; + case BLK_ZONE_COND_EMPTY: + info->alloc_offset = 0; + break; + case BLK_ZONE_COND_FULL: + info->alloc_offset = info->capacity; + break; + default: + /* Partially used zone. */ + info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); + __set_bit(zone_idx, active); + break; + } + + return 0; +} + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; - struct btrfs_device *device; u64 logical = cache->start; u64 length = cache->length; struct zone_info *zone_info = NULL; int ret; int i; - unsigned int nofs_flag; unsigned long *active = NULL; u64 last_alloc = 0; u32 num_sequential = 0, num_conventional = 0; @@ -1345,98 +1429,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) }
for (i = 0; i < map->num_stripes; i++) { - struct zone_info *info = &zone_info[i]; - bool is_sequential; - struct blk_zone zone; - struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; - int dev_replace_is_ongoing = 0; - - device = map->stripes[i].dev; - info->physical = map->stripes[i].physical; - - if (device->bdev == NULL) { - info->alloc_offset = WP_MISSING_DEV; - continue; - } - - is_sequential = btrfs_dev_is_sequential(device, info->physical); - if (is_sequential) - num_sequential++; - else - num_conventional++; - - /* - * Consider a zone as active if we can allow any number of - * active zones. - */ - if (!device->zone_info->max_active_zones) - __set_bit(i, active); - - if (!is_sequential) { - info->alloc_offset = WP_CONVENTIONAL; - continue; - } - - /* - * This zone will be used for allocation, so mark this zone - * non-empty. - */ - btrfs_dev_clear_zone_empty(device, info->physical); - - down_read(&dev_replace->rwsem); - dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) - btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); - up_read(&dev_replace->rwsem); - - /* - * The group is mapped to a sequential zone. Get the zone write - * pointer to determine the allocation offset within the zone. - */ - WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); - nofs_flag = memalloc_nofs_save(); - ret = btrfs_get_dev_zone(device, info->physical, &zone); - memalloc_nofs_restore(nofs_flag); - if (ret == -EIO || ret == -EOPNOTSUPP) { - ret = 0; - info->alloc_offset = WP_MISSING_DEV; - continue; - } else if (ret) { - goto out; - } - - if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { - btrfs_err_in_rcu(fs_info, - "zoned: unexpected conventional zone %llu on device %s (devid %llu)", - zone.start << SECTOR_SHIFT, - rcu_str_deref(device->name), device->devid); - ret = -EIO; + ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); + if (ret) goto out; - } - - info->capacity = (zone.capacity << SECTOR_SHIFT);
- switch (zone.cond) { - case BLK_ZONE_COND_OFFLINE: - case BLK_ZONE_COND_READONLY: - btrfs_err(fs_info, - "zoned: offline/readonly zone %llu on device %s (devid %llu)", - info->physical >> device->zone_info->zone_size_shift, - rcu_str_deref(device->name), device->devid); - info->alloc_offset = WP_MISSING_DEV; - break; - case BLK_ZONE_COND_EMPTY: - info->alloc_offset = 0; - break; - case BLK_ZONE_COND_FULL: - info->alloc_offset = info->capacity; - break; - default: - /* Partially used zone */ - info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); - __set_bit(i, active); - break; - } + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + num_conventional++; + else + num_sequential++; }
if (num_sequential > 0)
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.6-rc5 commit 9e0e3e74dc6928a0956f4e27e24d473c65887e96 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACZX0 CVE: CVE-2024-39496
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Split the code handling a type single block group from btrfs_load_block_group_zone_info to make the code more readable.
Reviewed-by: Johannes Thumshirn johannes.thumshirn@wdc.com Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Long Li leo.lilong@huawei.com --- fs/btrfs/zoned.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 7dbb81fa9edd..3976d06b4601 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1374,6 +1374,24 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, return 0; }
+static int btrfs_load_block_group_single(struct btrfs_block_group *bg, + struct zone_info *info, + unsigned long *active) +{ + if (info->alloc_offset == WP_MISSING_DEV) { + btrfs_err(bg->fs_info, + "zoned: cannot recover write pointer for zone %llu", + info->physical); + return -EIO; + } + + bg->alloc_offset = info->alloc_offset; + bg->zone_capacity = info->capacity; + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); + return 0; +} + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; @@ -1460,17 +1478,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { case 0: /* single */ - if (zone_info[0].alloc_offset == WP_MISSING_DEV) { - btrfs_err(fs_info, - "zoned: cannot recover write pointer for zone %llu", - zone_info[0].physical); - ret = -EIO; - goto out; - } - cache->alloc_offset = zone_info[0].alloc_offset; - cache->zone_capacity = zone_info[0].capacity; - if (test_bit(0, active)) - set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); + ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: if (map->type & BTRFS_BLOCK_GROUP_DATA) {
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.6-rc5 commit 87463f7e0250d471fac41e7c9c45ae21d83b5f85 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACZX0 CVE: CVE-2024-39496
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Split the code handling a type DUP block group from btrfs_load_block_group_zone_info to make the code more readable.
Reviewed-by: Johannes Thumshirn johannes.thumshirn@wdc.com Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Long Li leo.lilong@huawei.com --- fs/btrfs/zoned.c | 79 +++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 37 deletions(-)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3976d06b4601..694a2cf36bf8 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1392,6 +1392,47 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, return 0; }
+static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, + struct map_lookup *map, + struct zone_info *zone_info, + unsigned long *active) +{ + if (map->type & BTRFS_BLOCK_GROUP_DATA) { + btrfs_err(bg->fs_info, + "zoned: profile DUP not yet supported on data bg"); + return -EINVAL; + } + + if (zone_info[0].alloc_offset == WP_MISSING_DEV) { + btrfs_err(bg->fs_info, + "zoned: cannot recover write pointer for zone %llu", + zone_info[0].physical); + return -EIO; + } + if (zone_info[1].alloc_offset == WP_MISSING_DEV) { + btrfs_err(bg->fs_info, + "zoned: cannot recover write pointer for zone %llu", + zone_info[1].physical); + return -EIO; + } + if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { + btrfs_err(bg->fs_info, + "zoned: write pointer offset mismatch of zones in DUP profile"); + return -EIO; + } + + if (test_bit(0, active) != test_bit(1, active)) { + if (!btrfs_zone_activate(bg)) + return -EIO; + } else if (test_bit(0, active)) { + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); + } + + bg->alloc_offset = zone_info[0].alloc_offset; + bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); + return 0; +} + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; @@ -1481,43 +1522,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: - if (map->type & BTRFS_BLOCK_GROUP_DATA) { - btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); - ret = -EINVAL; - goto out; - } - if (zone_info[0].alloc_offset == WP_MISSING_DEV) { - btrfs_err(fs_info, - "zoned: cannot recover write pointer for zone %llu", - zone_info[0].physical); - ret = -EIO; - goto out; - } - if (zone_info[1].alloc_offset == WP_MISSING_DEV) { - btrfs_err(fs_info, - "zoned: cannot recover write pointer for zone %llu", - zone_info[1].physical); - ret = -EIO; - goto out; - } - if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { - btrfs_err(fs_info, - "zoned: write pointer offset mismatch of zones in DUP profile"); - ret = -EIO; - goto out; - } - if (test_bit(0, active) != test_bit(1, active)) { - if (!btrfs_zone_activate(cache)) { - ret = -EIO; - goto out; - } - } else { - if (test_bit(0, active)) - set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, - &cache->runtime_flags); - } - cache->alloc_offset = zone_info[0].alloc_offset; - cache->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); + ret = btrfs_load_block_group_dup(cache, map, zone_info, active); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID0:
From: Filipe Manana fdmanana@suse.com
stable inclusion from stable-v6.6.34 commit 092571ef9a812566c8f2c9038d9c2a64c49788d6 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACZX0 CVE: CVE-2024-39496
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=t...
--------------------------------
commit 0090d6e1b210551e63cf43958dc7a1ec942cdde9 upstream.
While loading a zone's info during creation of a block group, we can race with a device replace operation and then trigger a use-after-free on the device that was just replaced (source device of the replace operation).
This happens because at btrfs_load_zone_info() we extract a device from the chunk map into a local variable and then use the device while not under the protection of the device replace rwsem. So if there's a device replace operation happening when we extract the device and that device is the source of the replace operation, we will trigger a use-after-free if before we finish using the device the replace operation finishes and frees the device.
Fix this by enlarging the critical section under the protection of the device replace rwsem so that all uses of the device are done inside the critical section.
CC: stable@vger.kernel.org # 6.1.x: 15c12fcc50a1: btrfs: zoned: introduce a zone_info struct in btrfs_load_block_group_zone_info CC: stable@vger.kernel.org # 6.1.x: 09a46725cc84: btrfs: zoned: factor out per-zone logic from btrfs_load_block_group_zone_info CC: stable@vger.kernel.org # 6.1.x: 9e0e3e74dc69: btrfs: zoned: factor out single bg handling from btrfs_load_block_group_zone_info CC: stable@vger.kernel.org # 6.1.x: 87463f7e0250: btrfs: zoned: factor out DUP bg handling from btrfs_load_block_group_zone_info CC: stable@vger.kernel.org # 6.1.x Reviewed-by: Johannes Thumshirn johannes.thumshirn@wdc.com Signed-off-by: Filipe Manana fdmanana@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Long Li leo.lilong@huawei.com --- fs/btrfs/zoned.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 694a2cf36bf8..2784f6cb4482 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1293,7 +1293,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, struct map_lookup *map) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; - struct btrfs_device *device = map->stripes[zone_idx].dev; + struct btrfs_device *device; int dev_replace_is_ongoing = 0; unsigned int nofs_flag; struct blk_zone zone; @@ -1301,7 +1301,11 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
info->physical = map->stripes[zone_idx].physical;
+ down_read(&dev_replace->rwsem); + device = map->stripes[zone_idx].dev; + if (!device->bdev) { + up_read(&dev_replace->rwsem); info->alloc_offset = WP_MISSING_DEV; return 0; } @@ -1311,6 +1315,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, __set_bit(zone_idx, active);
if (!btrfs_dev_is_sequential(device, info->physical)) { + up_read(&dev_replace->rwsem); info->alloc_offset = WP_CONVENTIONAL; return 0; } @@ -1318,11 +1323,9 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, /* This zone will be used for allocation, so mark this zone non-empty. */ btrfs_dev_clear_zone_empty(device, info->physical);
- down_read(&dev_replace->rwsem); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); - up_read(&dev_replace->rwsem);
/* * The group is mapped to a sequential zone. Get the zone write pointer @@ -1333,6 +1336,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, ret = btrfs_get_dev_zone(device, info->physical, &zone); memalloc_nofs_restore(nofs_flag); if (ret) { + up_read(&dev_replace->rwsem); if (ret != -EIO && ret != -EOPNOTSUPP) return ret; info->alloc_offset = WP_MISSING_DEV; @@ -1344,6 +1348,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, "zoned: unexpected conventional zone %llu on device %s (devid %llu)", zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), device->devid); + up_read(&dev_replace->rwsem); return -EIO; }
@@ -1371,6 +1376,8 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, break; }
+ up_read(&dev_replace->rwsem); + return 0; }
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/10044 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/J...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/10044 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/J...