From: Jonas Oberhauser jonas.oberhauser@huawei.com
This is the update and fixes for memory model, simplify the model by removing redundancies, making semantics of seq-cst explicit, making ppo a subset of po, making ppo locally computable, as well as eliminating several spurious bugs in qspinlock and litmus tests, and ensuring correct compilation to the official power model.
These issues were detected by Jonas Oberhauser, Viktor Vafeiadis, and the authors of "Verifying and Optimizing Compact NUMA-Aware Locks on Weak Memory Models", and the solutions are proposed by Jonas Oberhauser and Viktor Vafeiadis.
Please refer to this link on the web: https://static.sched.com/hosted_files/osseu2022/e1/oss-eu22-jonas.pdf
Signed-off-by: Jonas Oberhauser jonas.oberhauser@huawei.com Signed-off-by: Hanjun Guo guohanjun@huawei.com --- tools/memory-model/linux-kernel.bell | 9 ++-- tools/memory-model/linux-kernel.cat | 100 +++++++++++++++++++---------------- tools/memory-model/linux-kernel.def | 34 ++++++------ 3 files changed, 77 insertions(+), 66 deletions(-)
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 5be86b1..880b28b 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -4,6 +4,8 @@ * Copyright (C) 2016 Luc Maranget luc.maranget@inria.fr for Inria * Copyright (C) 2017 Alan Stern stern@rowland.harvard.edu, * Andrea Parri parri.andrea@gmail.com + * Copyright (C) 2022 Jonas Oberhauser jonas.oberhauser@huawei.com + * Viktor Vafeiadis viktor@mpi-sws.org * * An earlier version of this file appeared in the companion webpage for * "Frightening small children and disconcerting grown-ups: Concurrency @@ -16,10 +18,11 @@ enum Accesses = 'once (*READ_ONCE,WRITE_ONCE*) || 'release (*smp_store_release*) || 'acquire (*smp_load_acquire*) || + 'seq-cst (*smp_load, xchg...*) || 'noreturn (* R of non-return RMW *) -instructions R[{'once,'acquire,'noreturn}] -instructions W[{'once,'release}] -instructions RMW[{'once,'acquire,'release}] +instructions R[{'seq-cst, 'once,'acquire,'noreturn}] +instructions W[{'seq-cst, 'once,'release}] +instructions RMW[{'seq-cst, 'once,'acquire,'release}]
enum Barriers = 'wmb (*smp_wmb*) || 'rmb (*smp_rmb*) || diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index d70315f..9f14d31 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -4,6 +4,8 @@ * Copyright (C) 2016 Luc Maranget luc.maranget@inria.fr for Inria * Copyright (C) 2017 Alan Stern stern@rowland.harvard.edu, * Andrea Parri parri.andrea@gmail.com + * Copyright (C) 2022 Jonas Oberhauser jonas.oberhauser@huawei.com, + * Viktor Vafeiadis viktor@mpi-sws.org * * An earlier version of this file appeared in the companion webpage for * "Frightening small children and disconcerting grown-ups: Concurrency @@ -25,25 +27,33 @@ include "lock.cat" (*******************)
(* Release Acquire *) + let acq-po = [Acquire] ; po ; [M] let po-rel = [M] ; po ; [Release] let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po
(* Fences *) -let R4rmb = R \ Noreturn (* Reads for which rmb works *) -let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb] -let wmb = [W] ; fencerel(Wmb) ; [W] -let mb = ([M] ; fencerel(Mb) ; [M]) | - ([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) | - ([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) | - ([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) | - ([M] ; po ; [UL] ; (co | po) ; [LKW] ; - fencerel(After-unlock-lock) ; [M]) -let gp = po ; [Sync-rcu | Sync-srcu] ; po? -let strong-fence = mb | gp - +let R4rmb = R \ Noreturn +let rmb = [R4rmb] ; po ; [Rmb] ; po ; [R4rmb] +let wmb = [W] ; po ; [Wmb] ; po ; [W] + +let rmb-plain = [R \ Noreturn] ; po ; [Rmb] ; po \ ( po ; [W] ; po-loc) ; [R \ Noreturn] +let plain-wmb = [W] ; po \ (po-loc ; [W] ; po) ; [Wmb] ; po ; [W] +let strong-fence = + [M] ; po ; [Mb] ; po ; [M] + | [M] ; po ; [Before-atomic] ; po ; [RMW] ; po? ; [M] + | [M] ; po? ; [RMW] ; po ; [After-atomic] ; po ; [M] + | [M] ; po? ; [LKW] ; po ; [After-spinlock] ; po ; [M] + | [M] ; po ; [UL] ; po ; [LKR] ; po ; [After-unlock-lock] ; po ; [M] + | [M] ; po? ; [SC] ; po ; [M] + | [M] ; po ; [SC] ; po? ; [M] + | po ; [Sync-rcu | Sync-srcu] ; po? +let strong-sync = strong-fence + | ([M] ; po-unlock-lock-po ; [After-unlock-lock] ; po ; [M]) let nonrw-fence = strong-fence | po-rel | acq-po let fence = nonrw-fence | wmb | rmb + + let barrier = fencerel(Barrier | Rmb | Wmb | Mb | Sync-rcu | Sync-srcu | Before-atomic | After-atomic | Acquire | Release | Rcu-lock | Rcu-unlock | Srcu-lock | Srcu-unlock) | @@ -64,35 +74,33 @@ empty rmw & (fre ; coe) as atomic (* Instruction execution ordering *) (**********************************)
+ (* Preserved Program Order *) -let dep = addr | data -let rwdep = (dep | ctrl) ; [W] -let overwrite = co | fr -let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb) -let to-r = addr | (dep ; [Marked] ; rfi) -let ppo = to-r | to-w | fence | (po-unlock-lock-po & int) +let dop = rmb-plain? ; (addr | data | ctrl | rfi)+; plain-wmb? +let to-w = (dop | po-loc) ; [W] +let lrs = ([W] ; po-loc ; [R]) \ (po-loc ; [W] ; po-loc) +let to-r = addr | ((addr | data) ; lrs) +let ppo = to-r | to-w | fence | (po ; [UL] ; po ; [LKR] ; po)
(* Propagation: Ordering from release operations and strong fences. *) -let A-cumul(r) = (rfe ; [Marked])? ; r -let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb | - po-unlock-lock-po) ; [Marked] -let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ; - [Marked] ; rfe? ; [Marked] +let cumul-fence = [Marked] ; (rfe? ; (po-rel ; (rfe ; rmw)*) | wmb | po-unlock-lock-po) ; [Marked] +let prop = [Marked] ; ((co | fr) & ext) ; cumul-fence* ; [Marked] ; rfe? ; [Marked] +
(* * Happens Before: Ordering from the passage of time. - * No fences needed here for prop because relation confined to one process. *) -let hb = [Marked] ; (ppo | rfe | ((prop \ id) & int)) ; [Marked] -acyclic hb as happens-before +let hb = [Marked] ; (ppo | rfe | prop ; strong-sync) ; [Marked] +acyclic hb + +(* No fences needed here for prop because relation confined to one process. *) +irreflexive (prop ; ppo*) + + + +
-(****************************************) -(* Write and fence propagation ordering *) -(****************************************)
-(* Propagation: Each non-rf link needs a strong fence. *) -let pb = prop ; strong-fence ; hb* ; [Marked] -acyclic pb as propagation
(*******) (* RCU *) @@ -118,7 +126,7 @@ let srcu-rscsi = srcu-rscs^-1 * one but two non-rf relations, but only in conjunction with an RCU * read-side critical section. *) -let rcu-link = po? ; hb* ; pb* ; prop ; po +let rcu-link = po? ; hb* ; prop? ; po
(* * Any sequence containing at least as many grace periods as RCU read-side @@ -139,20 +147,20 @@ let rec rcu-order = rcu-gp | srcu-gp | ((srcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; srcu-gp) & loc) | (rcu-order ; rcu-link ; rcu-order) let rcu-fence = po ; rcu-order ; po? -let fence = fence | rcu-fence -let strong-fence = strong-fence | rcu-fence +let fence-rcu = fence | rcu-fence +let strong-sync-rcu = strong-sync | rcu-fence
-(* rb orders instructions just as pb does *) -let rb = prop ; rcu-fence ; hb* ; pb* ; [Marked] +(* rb orders instructions just as hb does *) +let rb = prop? ; rcu-fence ; hb* ; [Marked]
irreflexive rb as rcu
(* - * The happens-before, propagation, and rcu constraints are all + * The happens-before and rcu constraints are all * expressions of temporal ordering. They could be replaced by * a single constraint on an "executes-before" relation, xb: * - * let xb = hb | pb | rb + * let xb = hb | rb * acyclic xb as executes-before *)
@@ -166,24 +174,24 @@ let mixed-accesses = ([Plain & W] ; (po-loc \ barrier) ; [Marked]) | flag ~empty mixed-accesses as mixed-accesses
(* Executes-before and visibility *) -let xbstar = (hb | pb | rb)* +let xbstar = (hb | rb)* let vis = cumul-fence* ; rfe? ; [Marked] ; - ((strong-fence ; [Marked] ; xbstar) | (xbstar & int)) + ((strong-sync-rcu ; [Marked] ; xbstar) | (xbstar & int))
(* Boundaries for lifetimes of plain accesses *) -let w-pre-bounded = [Marked] ; (addr | fence)? +let w-pre-bounded = [Marked] ; (addr | fence-rcu)? let r-pre-bounded = [Marked] ; (addr | nonrw-fence | ([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))? -let w-post-bounded = fence? ; [Marked] +let w-post-bounded = fence-rcu? ; [Marked] let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ; [Marked]
(* Visibility and executes-before for plain accesses *) -let ww-vis = fence | (strong-fence ; xbstar ; w-pre-bounded) | +let ww-vis = (strong-sync-rcu ; xbstar ; w-pre-bounded) | (w-post-bounded ; vis ; w-pre-bounded) -let wr-vis = fence | (strong-fence ; xbstar ; r-pre-bounded) | +let wr-vis = (strong-sync-rcu ; xbstar ; r-pre-bounded) | (w-post-bounded ; vis ; r-pre-bounded) -let rw-xbstar = fence | (r-post-bounded ; xbstar ; w-pre-bounded) +let rw-xbstar = (r-post-bounded ; xbstar ; w-pre-bounded)
(* Potential races *) let pre-race = ext & ((Plain * M) | ((M \ IW) * Plain)) diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index ef0f3c1..df239c2 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def @@ -14,7 +14,7 @@ smp_store_release(X,V) { __store{release}(*X,V); } smp_load_acquire(X) __load{acquire}(*X) rcu_assign_pointer(X,V) { __store{release}(X,V); } rcu_dereference(X) __load{once}(X) -smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; } +smp_store_mb(X,V) { __store{seq-cst}(X,V); }
// Fences smp_mb() { __fence{mb}; } @@ -27,11 +27,11 @@ smp_mb__after_unlock_lock() { __fence{after-unlock-lock}; } barrier() { __fence{barrier}; }
// Exchange -xchg(X,V) __xchg{mb}(X,V) +xchg(X,V) __xchg{seq-cst}(X,V) xchg_relaxed(X,V) __xchg{once}(X,V) xchg_release(X,V) __xchg{release}(X,V) xchg_acquire(X,V) __xchg{acquire}(X,V) -cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W) +cmpxchg(X,V,W) __cmpxchg{seq-cst}(X,V,W) cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W) cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W) cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W) @@ -65,52 +65,52 @@ atomic_sub(V,X) { __atomic_op(X,-,V); } atomic_inc(X) { __atomic_op(X,+,1); } atomic_dec(X) { __atomic_op(X,-,1); }
-atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V) +atomic_add_return(V,X) __atomic_op_return{seq-cst}(X,+,V) atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V) atomic_add_return_acquire(V,X) __atomic_op_return{acquire}(X,+,V) atomic_add_return_release(V,X) __atomic_op_return{release}(X,+,V) -atomic_fetch_add(V,X) __atomic_fetch_op{mb}(X,+,V) +atomic_fetch_add(V,X) __atomic_fetch_op{seq-cst}(X,+,V) atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{once}(X,+,V) atomic_fetch_add_acquire(V,X) __atomic_fetch_op{acquire}(X,+,V) atomic_fetch_add_release(V,X) __atomic_fetch_op{release}(X,+,V)
-atomic_inc_return(X) __atomic_op_return{mb}(X,+,1) +atomic_inc_return(X) __atomic_op_return{seq-cst}(X,+,1) atomic_inc_return_relaxed(X) __atomic_op_return{once}(X,+,1) atomic_inc_return_acquire(X) __atomic_op_return{acquire}(X,+,1) atomic_inc_return_release(X) __atomic_op_return{release}(X,+,1) -atomic_fetch_inc(X) __atomic_fetch_op{mb}(X,+,1) +atomic_fetch_inc(X) __atomic_fetch_op{seq-cst}(X,+,1) atomic_fetch_inc_relaxed(X) __atomic_fetch_op{once}(X,+,1) atomic_fetch_inc_acquire(X) __atomic_fetch_op{acquire}(X,+,1) atomic_fetch_inc_release(X) __atomic_fetch_op{release}(X,+,1)
-atomic_sub_return(V,X) __atomic_op_return{mb}(X,-,V) +atomic_sub_return(V,X) __atomic_op_return{seq-cst}(X,-,V) atomic_sub_return_relaxed(V,X) __atomic_op_return{once}(X,-,V) atomic_sub_return_acquire(V,X) __atomic_op_return{acquire}(X,-,V) atomic_sub_return_release(V,X) __atomic_op_return{release}(X,-,V) -atomic_fetch_sub(V,X) __atomic_fetch_op{mb}(X,-,V) +atomic_fetch_sub(V,X) __atomic_fetch_op{seq-cst}(X,-,V) atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{once}(X,-,V) atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{acquire}(X,-,V) atomic_fetch_sub_release(V,X) __atomic_fetch_op{release}(X,-,V)
-atomic_dec_return(X) __atomic_op_return{mb}(X,-,1) +atomic_dec_return(X) __atomic_op_return{seq-cst}(X,-,1) atomic_dec_return_relaxed(X) __atomic_op_return{once}(X,-,1) atomic_dec_return_acquire(X) __atomic_op_return{acquire}(X,-,1) atomic_dec_return_release(X) __atomic_op_return{release}(X,-,1) -atomic_fetch_dec(X) __atomic_fetch_op{mb}(X,-,1) +atomic_fetch_dec(X) __atomic_fetch_op{seq-cst}(X,-,1) atomic_fetch_dec_relaxed(X) __atomic_fetch_op{once}(X,-,1) atomic_fetch_dec_acquire(X) __atomic_fetch_op{acquire}(X,-,1) atomic_fetch_dec_release(X) __atomic_fetch_op{release}(X,-,1)
-atomic_xchg(X,V) __xchg{mb}(X,V) +atomic_xchg(X,V) __xchg{seq-cst}(X,V) atomic_xchg_relaxed(X,V) __xchg{once}(X,V) atomic_xchg_release(X,V) __xchg{release}(X,V) atomic_xchg_acquire(X,V) __xchg{acquire}(X,V) -atomic_cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W) +atomic_cmpxchg(X,V,W) __cmpxchg{seq-cst}(X,V,W) atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W) atomic_cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W) atomic_cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
-atomic_sub_and_test(V,X) __atomic_op_return{mb}(X,-,V) == 0 -atomic_dec_and_test(X) __atomic_op_return{mb}(X,-,1) == 0 -atomic_inc_and_test(X) __atomic_op_return{mb}(X,+,1) == 0 -atomic_add_negative(V,X) __atomic_op_return{mb}(X,+,V) < 0 +atomic_sub_and_test(V,X) __atomic_op_return{seq-cst}(X,-,V) == 0 +atomic_dec_and_test(X) __atomic_op_return{seq-cst}(X,-,1) == 0 +atomic_inc_and_test(X) __atomic_op_return{seq-cst}(X,+,1) == 0 +atomic_add_negative(V,X) __atomic_op_return{seq-cst}(X,+,V) < 0