From: Li Lingfeng lilingfeng3@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I53Q6M CVE: NA
---------------------------
Currently, we don't have an easy way to figure out a corrupted file system which has been writen data through the raw block device. It is risky to open a block device exclusively which has been opened for write by some other processes since this may lead to potential data corruption. This patch record the exclusive openers and give a hint if that happens.
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhihao Cheng chengzhihao1@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- fs/block_dev.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 2 files changed, 61 insertions(+)
diff --git a/fs/block_dev.c b/fs/block_dev.c index 58be97f412fd..6adb17cc7dbb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -35,6 +35,7 @@ #include <linux/falloc.h> #include <linux/uaccess.h> #include <linux/suspend.h> +#include <linux/sched/task.h> #include "internal.h"
struct bdev_inode { @@ -1538,6 +1539,39 @@ static void bdev_disk_changed(struct block_device *bdev, bool invalidate) } }
+static void blkdev_dump_conflict_opener(struct block_device *bdev, char *msg) +{ + char name[BDEVNAME_SIZE]; + struct task_struct *p = NULL; + char comm_buf[TASK_COMM_LEN]; + pid_t p_pid; + + rcu_read_lock(); + p = rcu_dereference(current->real_parent); + get_task_comm(comm_buf, p); + p_pid = p->pid; + rcu_read_unlock(); + + pr_info_ratelimited("%s %s. current [%d %s]. parent [%d %s]\n", + msg, bdevname(bdev, name), + current->pid, current->comm, p_pid, comm_buf); +} + +static bool is_conflict_excl_open(struct block_device *bdev, + struct block_device *whole, fmode_t mode) +{ + if (bdev->bd_holders) + return false; + + if (bdev->bd_write_openers > ((mode & FMODE_WRITE) ? 1 : 0)) + return true; + + if (bdev == whole) + return !!bdev->bd_part_write_openers; + + return !!whole->bd_write_openers; +} + /* * bd_mutex locking: * @@ -1666,6 +1700,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_openers++; if (for_part) bdev->bd_part_count++; + + if (!for_part && (mode & FMODE_WRITE)) { + spin_lock(&bdev_lock); + bdev->bd_write_openers++; + if (bdev->bd_contains != bdev) + bdev->bd_contains->bd_part_write_openers++; + spin_unlock(&bdev_lock); + } + mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); /* only one opener holds refs to the module and disk */ @@ -1732,6 +1775,14 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) /* finish claiming */ mutex_lock(&bdev->bd_mutex); spin_lock(&bdev_lock); + /* + * Open an write opened block device exclusively, the + * writing process may probability corrupt the device, + * such as a mounted file system, give a hint here. + */ + if (!res && is_conflict_excl_open(bdev, whole, mode)) + blkdev_dump_conflict_opener(bdev, + "VFS: Open an write opened block device exclusively");
if (!res) { BUG_ON(!bd_may_claim(bdev, whole, holder)); @@ -1907,6 +1958,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) if (for_part) bdev->bd_part_count--;
+ if (!for_part && (mode & FMODE_WRITE)) { + spin_lock(&bdev_lock); + bdev->bd_write_openers--; + if (bdev->bd_contains != bdev) + bdev->bd_contains->bd_part_write_openers--; + spin_unlock(&bdev_lock); + } + if (!--bdev->bd_openers) { WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); diff --git a/include/linux/fs.h b/include/linux/fs.h index 3892a5793c62..a8e36afa66b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -465,6 +465,8 @@ struct block_device { struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ + int bd_write_openers; + int bd_part_write_openers; void * bd_claiming; void * bd_holder; int bd_holders;