From: Mikulas Patocka mpatocka@redhat.com
commit aa9509209c5ac2f0b35d01a922bf9ae072d0c2fc upstream.
When committing state, the function writecache_flush does the following: 1. write metadata (writecache_commit_flushed) 2. flush disk cache (writecache_commit_flushed) 3. wait for data writes to complete (writecache_wait_for_ios) 4. increase superblock seq_count 5. write the superblock 6. flush disk cache
It may happen that at step 3, when we wait for some write to finish, the disk may report the write as finished, but the write only hit the disk cache and it is not yet stored in persistent storage. At step 5 we write the superblock - it may happen that the superblock is written before the write that we waited for in step 3. If the machine crashes, it may result in incorrect data being returned after reboot.
In order to fix the bug, we must swap steps 2 and 3 in the above sequence, so that we first wait for writes to complete and then flush the disk cache.
Fixes: 48debafe4f2f ("dm: add writecache target") Cc: stable@vger.kernel.org # 4.18+ Signed-off-by: Mikulas Patocka mpatocka@redhat.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/dm-writecache.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-)
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 503c426..820c2e0 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -447,7 +447,13 @@ static void writecache_notify_io(unsigned long error, void *context) complete(&endio->c); }
-static void ssd_commit_flushed(struct dm_writecache *wc) +static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) +{ + wait_event(wc->bio_in_progress_wait[direction], + !atomic_read(&wc->bio_in_progress[direction])); +} + +static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { struct dm_io_region region; struct dm_io_request req; @@ -493,17 +499,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc) writecache_notify_io(0, &endio); wait_for_completion_io(&endio.c);
+ if (wait_for_ios) + writecache_wait_for_ios(wc, WRITE); + writecache_disk_flush(wc, wc->ssd_dev);
memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); }
-static void writecache_commit_flushed(struct dm_writecache *wc) +static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { if (WC_MODE_PMEM(wc)) wmb(); else - ssd_commit_flushed(wc); + ssd_commit_flushed(wc, wait_for_ios); }
static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) @@ -527,12 +536,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) writecache_error(wc, r, "error flushing metadata: %d", r); }
-static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) -{ - wait_event(wc->bio_in_progress_wait[direction], - !atomic_read(&wc->bio_in_progress[direction])); -} - #define WFE_RETURN_FOLLOWING 1 #define WFE_LOWEST_SEQ 2
@@ -730,14 +733,12 @@ static void writecache_flush(struct dm_writecache *wc) e = e2; cond_resched(); } - writecache_commit_flushed(wc); - - writecache_wait_for_ios(wc, WRITE); + writecache_commit_flushed(wc, true);
wc->seq_count++; pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false);
wc->overwrote_committed = false;
@@ -761,7 +762,7 @@ static void writecache_flush(struct dm_writecache *wc) }
if (need_flush_after_free) - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); }
static void writecache_flush_work(struct work_struct *work) @@ -814,7 +815,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ }
if (discarded_something) - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); }
static bool writecache_wait_for_writeback(struct dm_writecache *wc) @@ -963,7 +964,7 @@ static void writecache_resume(struct dm_target *ti)
if (need_flush) { writecache_flush_all_metadata(wc); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); }
wc_unlock(wc); @@ -1347,7 +1348,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head * wc->writeback_size--; n_walked++; if (unlikely(n_walked >= ENDIO_LATENCY)) { - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); wc_unlock(wc); wc_lock(wc); n_walked = 0; @@ -1428,7 +1429,7 @@ static int writecache_endio_thread(void *data) writecache_wait_for_ios(wc, READ); }
- writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false);
wc_unlock(wc); } @@ -1759,10 +1760,10 @@ static int init_memory(struct dm_writecache *wc) write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
writecache_flush_all_metadata(wc); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false);
return 0; }