From: Hou Tao <houtao1(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 167067
CVE: NA
--------------------------------
In __blkdev_direct_IO_simple(), when bi_private is NULL, it assumes
bi_css must be NULL as showed below:
CPU 1: CPU 2:
__blkdev_direct_IO_simple
submit_bio
bio_endio
bio_uninit(bio)
css_put(bi_css)
bi_css = NULL
set_current_state(TASK_UNINTERRUPTIBLE)
bio->bi_end_io
blkdev_bio_end_io_simple
bio->bi_private = NULL
// bi_private is NULL
READ_ONCE(bio->bi_private)
wake_up_process
smp_mb__after_spinlock
bio_unint(bio)
// read bi_css as no-NULL
css_put(bi_css)
Because there is no memory barrier between the reading and the writing of
these two variables, the assumption is wrong under weak-memory model
machine (e.g. arm64). bi_css will be put twice and leads to the following
warning:
percpu_ref_switch_to_atomic_rcu: percpu ref (css_release) <= 0 (-3) after switching to atomic
There is a similar problem in __blkdev_direct_IO() which occurs between
dio->waiter and bio.bi_status.
Fixing it by adding a smp_rmb() between the reads of two variables, and a
corresponding smp_wmb() between the writes.
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
fs/block_dev.c | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 786d105692e85..30dd7b19bd2e3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -195,6 +195,11 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
{
struct task_struct *waiter = bio->bi_private;
+ /*
+ * Paired with smp_rmb() after reading bio->bi_private
+ * in __blkdev_direct_IO_simple()
+ */
+ smp_wmb();
WRITE_ONCE(bio->bi_private, NULL);
wake_up_process(waiter);
}
@@ -251,8 +256,14 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
qc = submit_bio(&bio);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(bio.bi_private))
+ if (!READ_ONCE(bio.bi_private)) {
+ /*
+ * Paired with smp_wmb() in
+ * blkdev_bio_end_io_simple()
+ */
+ smp_rmb();
break;
+ }
if (!(iocb->ki_flags & IOCB_HIPRI) ||
!blk_poll(bdev_get_queue(bdev), qc))
io_schedule();
@@ -317,6 +328,12 @@ static void blkdev_bio_end_io(struct bio *bio)
} else {
struct task_struct *waiter = dio->waiter;
+ if (!dio->multi_bio)
+ /*
+ * Paired with smp_rmb() after reading
+ * dio->waiter in __blkdev_direct_IO()
+ */
+ smp_wmb();
WRITE_ONCE(dio->waiter, NULL);
wake_up_process(waiter);
}
@@ -417,8 +434,11 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(dio->waiter))
+ if (!READ_ONCE(dio->waiter)) {
+ /* Paired with smp_wmb() in blkdev_bio_end_io() */
+ smp_rmb();
break;
+ }
if (!(iocb->ki_flags & IOCB_HIPRI) ||
!blk_poll(bdev_get_queue(bdev), qc))
--
2.25.1