Prevent potential lock starvation by read lock.
Peter Zijlstra (1): locking/rwsem: Better collate rwsem_read_trylock()
Waiman Long (2): locking/rwsem: Pass the current atomic count to rwsem_down_read_slowpath() locking/rwsem: Prevent potential lock starvation
kernel/locking/rwsem.c | 56 ++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 19 deletions(-)
From: Peter Zijlstra peterz@infradead.org
mainline inclusion from mainline-v5.11-rc1 commit 3379116a0ca965b00e6522c7ea3f16c9dbd8f9f9 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7A0N9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
All users of rwsem_read_trylock() do rwsem_set_reader_owned(sem) on success, move it into rwsem_read_trylock() proper.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20201207090243.GE3040@hirez.programming.kicks-ass.... Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/locking/rwsem.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index cc5cc889b5b7..7bf45b0a1b1d 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -273,9 +273,16 @@ static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem) static inline bool rwsem_read_trylock(struct rw_semaphore *sem) { long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count); + if (WARN_ON_ONCE(cnt < 0)) rwsem_set_nonspinnable(sem); - return !(cnt & RWSEM_READ_FAILED_MASK); + + if (!(cnt & RWSEM_READ_FAILED_MASK)) { + rwsem_set_reader_owned(sem); + return true; + } + + return false; }
/* @@ -1340,8 +1347,6 @@ static inline void __down_read(struct rw_semaphore *sem) if (!rwsem_read_trylock(sem)) { rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE); DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); - } else { - rwsem_set_reader_owned(sem); } }
@@ -1351,8 +1356,6 @@ static inline int __down_read_interruptible(struct rw_semaphore *sem) if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_INTERRUPTIBLE))) return -EINTR; DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); - } else { - rwsem_set_reader_owned(sem); } return 0; } @@ -1363,8 +1366,6 @@ static inline int __down_read_killable(struct rw_semaphore *sem) if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE))) return -EINTR; DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); - } else { - rwsem_set_reader_owned(sem); } return 0; }
From: Waiman Long longman@redhat.com
mainline inclusion from mainline-v5.11-rc1 commit c8fe8b0564388f41147326f31e4587171aacccd4 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7A0N9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The atomic count value right after reader count increment can be useful to determine the rwsem state at trylock time. So the count value is passed down to rwsem_down_read_slowpath() to be used when appropriate.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Davidlohr Bueso dbueso@suse.de Link: https://lkml.kernel.org/r/20201121041416.12285-2-longman@redhat.com Conflicts: kernel/locking/rwsem.c [yyl: No functional changed.] Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/locking/rwsem.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 7bf45b0a1b1d..2c4994e58f73 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -270,14 +270,14 @@ static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem) owner | RWSEM_NONSPINNABLE)); }
-static inline bool rwsem_read_trylock(struct rw_semaphore *sem) +static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp) { - long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count); + *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
- if (WARN_ON_ONCE(cnt < 0)) + if (WARN_ON_ONCE(*cntp < 0)) rwsem_set_nonspinnable(sem);
- if (!(cnt & RWSEM_READ_FAILED_MASK)) { + if (!(*cntp & RWSEM_READ_FAILED_MASK)) { rwsem_set_reader_owned(sem); return true; } @@ -996,9 +996,9 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) * Wait for the read lock to be granted */ static struct rw_semaphore __sched * -rwsem_down_read_slowpath(struct rw_semaphore *sem, int state) +rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, int state) { - long count, adjustment = -RWSEM_READER_BIAS; + long adjustment = -RWSEM_READER_BIAS; struct rwsem_waiter waiter; DEFINE_WAKE_Q(wake_q); bool wake = false; @@ -1344,16 +1344,20 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) */ static inline void __down_read(struct rw_semaphore *sem) { - if (!rwsem_read_trylock(sem)) { - rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE); + long count; + + if (!rwsem_read_trylock(sem, &count)) { + rwsem_down_read_slowpath(sem, count, TASK_UNINTERRUPTIBLE); DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); } }
static inline int __down_read_interruptible(struct rw_semaphore *sem) { - if (!rwsem_read_trylock(sem)) { - if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_INTERRUPTIBLE))) + long count; + + if (!rwsem_read_trylock(sem, &count)) { + if (IS_ERR(rwsem_down_read_slowpath(sem, count, TASK_INTERRUPTIBLE))) return -EINTR; DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); } @@ -1362,8 +1366,10 @@ static inline int __down_read_interruptible(struct rw_semaphore *sem)
static inline int __down_read_killable(struct rw_semaphore *sem) { - if (!rwsem_read_trylock(sem)) { - if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE))) + long count; + + if (!rwsem_read_trylock(sem, &count)) { + if (IS_ERR(rwsem_down_read_slowpath(sem, count, TASK_KILLABLE))) return -EINTR; DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); }
From: Waiman Long longman@redhat.com
mainline inclusion from mainline-v5.11-rc1 commit 2f06f702925b512a95b95dca3855549c047eef58 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7A0N9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The lock handoff bit is added in commit 4f23dbc1e657 ("locking/rwsem: Implement lock handoff to prevent lock starvation") to avoid lock starvation. However, allowing readers to do optimistic spinning does introduce an unlikely scenario where lock starvation can happen.
The lock handoff bit may only be set when a waiter is being woken up. In the case of reader unlock, wakeup happens only when the reader count reaches 0. If there is a continuous stream of incoming readers acquiring read lock via optimistic spinning, it is possible that the reader count may never reach 0 and so the handoff bit will never be asserted.
One way to prevent this scenario from happening is to disallow optimistic spinning if the rwsem is currently owned by readers. If the previous or current owner is a writer, optimistic spinning will be allowed.
If the previous owner is a reader but the reader count has reached 0 before, a wakeup should have been issued. So the handoff mechanism will be kicked in to prevent lock starvation. As a result, it should be OK to do optimistic spinning in this case.
This patch may have some impact on reader performance as it reduces reader optimistic spinning especially if the lock critical sections are short the number of contending readers are small.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Davidlohr Bueso dbueso@suse.de Link: https://lkml.kernel.org/r/20201121041416.12285-3-longman@redhat.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/locking/rwsem.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2c4994e58f73..976b20b2dea7 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -998,16 +998,27 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) static struct rw_semaphore __sched * rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, int state) { - long adjustment = -RWSEM_READER_BIAS; + long owner, adjustment = -RWSEM_READER_BIAS; + long rcnt = (count >> RWSEM_READER_SHIFT); struct rwsem_waiter waiter; DEFINE_WAKE_Q(wake_q); bool wake = false;
+ /* + * To prevent a constant stream of readers from starving a sleeping + * waiter, don't attempt optimistic spinning if the lock is currently + * owned by readers. + */ + owner = atomic_long_read(&sem->owner); + if ((owner & RWSEM_READER_OWNED) && (rcnt > 1) && + !(count & RWSEM_WRITER_LOCKED)) + goto queue; + /* * Save the current read-owner of rwsem, if available, and the * reader nonspinnable bit. */ - waiter.last_rowner = atomic_long_read(&sem->owner); + waiter.last_rowner = owner; if (!(waiter.last_rowner & RWSEM_READER_OWNED)) waiter.last_rowner &= RWSEM_RD_NONSPINNABLE;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/947 邮件列表地址: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/YN...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/947 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/YN...