hulk inclusion category: bugfix bugzilla: NA CVE: NA
----------------------------------------
This reverts commit 99294378e022ec2785f88247a7373767fd090e3a.
288e4521f0f6 ("x86/asm: 'Simplify' GEN_*_RMWcc() macros") need be apply before this commit to avoid compile error.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/core-api/refcount-vs-atomic.rst | 24 +++---------------- arch/x86/include/asm/refcount.h | 18 ++------------ lib/refcount.c | 18 ++++---------- 3 files changed, 10 insertions(+), 50 deletions(-)
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst index 976e85adffe8..322851bada16 100644 --- a/Documentation/core-api/refcount-vs-atomic.rst +++ b/Documentation/core-api/refcount-vs-atomic.rst @@ -54,13 +54,6 @@ must propagate to all other CPUs before the release operation (A-cumulative property). This is implemented using :c:func:`smp_store_release`.
-An ACQUIRE memory ordering guarantees that all post loads and -stores (all po-later instructions) on the same CPU are -completed after the acquire operation. It also guarantees that all -po-later stores on the same CPU must propagate to all other CPUs -after the acquire operation executes. This is implemented using -:c:func:`smp_acquire__after_ctrl_dep`. - A control dependency (on success) for refcounters guarantees that if a reference for an object was successfully obtained (reference counter increment or addition happened, function returned true), @@ -126,24 +119,13 @@ Memory ordering guarantees changes: result of obtaining pointer to the object!
-case 5) - generic dec/sub decrement-based RMW ops that return a value ---------------------------------------------------------------------- +case 5) - decrement-based RMW ops that return a value +-----------------------------------------------------
Function changes:
* :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test` * :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test` - -Memory ordering guarantees changes: - - * fully ordered --> RELEASE ordering + ACQUIRE ordering on success - - -case 6) other decrement-based RMW ops that return a value ---------------------------------------------------------- - -Function changes: - * no atomic counterpart --> :c:func:`refcount_dec_if_one` * ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)``
@@ -154,7 +136,7 @@ Memory ordering guarantees changes: .. note:: :c:func:`atomic_add_unless` only provides full order on success.
-case 7) - lock-based RMW +case 6) - lock-based RMW ------------------------
Function changes: diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 88061df7add6..19b90521954c 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -67,28 +67,14 @@ static __always_inline void refcount_dec(refcount_t *r) static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { - bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, + GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, r->refs.counter, "er", i, "%0", e, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; }
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { - bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, + GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, r->refs.counter, "%0", e, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; }
static __always_inline __must_check diff --git a/lib/refcount.c b/lib/refcount.c index 6e904af0fb3e..ebcf8cd49e05 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -33,9 +33,6 @@ * Note that the allocator is responsible for ordering things between free() * and alloc(). * - * The decrements dec_and_test() and sub_and_test() also provide acquire - * ordering on success. - * */
#include <linux/mutex.h> @@ -167,8 +164,8 @@ EXPORT_SYMBOL(refcount_inc_checked); * at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done - * before, and provides an acquire ordering on success such that free() - * must come after. + * before, and provides a control dependency such that free() must come after. + * See the comment on top. * * Use of this function is not recommended for the normal reference counting * use case in which references are taken and released one at a time. In these @@ -193,12 +190,7 @@ bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
- if (!new) { - smp_acquire__after_ctrl_dep(); - return true; - } - return false; - + return !new; } EXPORT_SYMBOL(refcount_sub_and_test_checked);
@@ -210,8 +202,8 @@ EXPORT_SYMBOL(refcount_sub_and_test_checked); * decrement when saturated at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done - * before, and provides an acquire ordering on success such that free() - * must come after. + * before, and provides a control dependency such that free() must come after. + * See the comment on top. * * Return: true if the resulting refcount is 0, false otherwise */
From: Peter Zijlstra peterz@infradead.org
mainline inclusion from mainline-v4.20-rc1 commit 288e4521f0f6717909933116563e66bb894ae2af category: bugfix bugzilla: NA CVE: NA ---------------------------
Currently the GEN_*_RMWcc() macros include a return statement, which pretty much mandates we directly wrap them in a (inline) function.
Macros with return statements are tricky and, as per the above, limit use, so remove the return statement and make them statement-expressions. This allows them to be used more widely.
Also, shuffle the arguments a bit. Place the @cc argument as 3rd, this makes it consistent between UNARY and BINARY, but more importantly, it makes the @arg0 argument last.
Since the @arg0 argument is now last, we can do CPP trickery and make it an optional argument, simplifying the users; 17 out of 18 occurences do not need this argument.
Finally, change to asm symbolic names, instead of the numeric ordering of operands, which allows us to get rid of __BINARY_RMWcc_ARG and get cleaner code overall.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: JBeulich@suse.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Andy Lutomirski luto@amacapital.net Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: H. Peter Anvin hpa@zytor.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: bp@alien8.de Cc: hpa@linux.intel.com Link: https://lkml.kernel.org/r/20181003130957.108960094@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org
Conflicts: arch/x86/include/asm/refcount.h [hanjun: update arch/x86/include/asm/qspinlock.h as the change of GEN_BINARY_RMWcc interface, and update arch/x86/include/asm/refcount.h to change 'counter' to 'var'] Signed-off-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/include/asm/atomic.h | 8 ++-- arch/x86/include/asm/atomic64_64.h | 8 ++-- arch/x86/include/asm/bitops.h | 9 ++-- arch/x86/include/asm/local.h | 8 ++-- arch/x86/include/asm/preempt.h | 2 +- arch/x86/include/asm/qspinlock.h | 4 +- arch/x86/include/asm/refcount.h | 20 +++++---- arch/x86/include/asm/rmwcc.h | 69 ++++++++++++++++++------------ 8 files changed, 70 insertions(+), 58 deletions(-)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index d266a4066289..115127c7ad28 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -82,7 +82,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); + return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test
@@ -122,7 +122,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v) */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test
@@ -136,7 +136,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test
@@ -151,7 +151,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); + return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 55ca027f8c1c..5e86c0d68ac1 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -73,7 +73,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v) */ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); + return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
@@ -115,7 +115,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) */ static inline bool arch_atomic64_dec_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
@@ -129,7 +129,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v) */ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
@@ -144,7 +144,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) */ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); + return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } #define arch_atomic64_add_negative arch_atomic64_add_negative
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 33611a74bfff..cb8a7386933f 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -206,8 +206,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), - *addr, "Ir", nr, "%0", c); + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr); }
/** @@ -253,8 +252,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), - *addr, "Ir", nr, "%0", c); + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr); }
/** @@ -307,8 +305,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), - *addr, "Ir", nr, "%0", c); + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); }
static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index c91083c59845..349a47acaa4a 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -53,7 +53,7 @@ static inline void local_sub(long i, local_t *l) */ static inline bool local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e); + return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i); }
/** @@ -66,7 +66,7 @@ static inline bool local_sub_and_test(long i, local_t *l) */ static inline bool local_dec_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e); + return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e); }
/** @@ -79,7 +79,7 @@ static inline bool local_dec_and_test(local_t *l) */ static inline bool local_inc_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e); + return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e); }
/** @@ -93,7 +93,7 @@ static inline bool local_inc_and_test(local_t *l) */ static inline bool local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s); + return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i); }
/** diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 7f2dbd91fc74..90cb2f36c042 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -88,7 +88,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); + return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); }
/* diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 47aafe9897e8..e34ffb0af6bd 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -14,8 +14,8 @@
static __always_inline bool __queued_RMW_btsl(struct qspinlock *lock) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, - "I", _Q_PENDING_OFFSET, "%0", c); + return GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, + c, "I", _Q_PENDING_OFFSET); }
static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 19b90521954c..dbaed55c1c24 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -17,7 +17,7 @@ */ #define _REFCOUNT_EXCEPTION \ ".pushsection .text..refcount\n" \ - "111:\tlea %[counter], %%" _ASM_CX "\n" \ + "111:\tlea %[var], %%" _ASM_CX "\n" \ "112:\t" ASM_UD2 "\n" \ ASM_UNREACHABLE \ ".popsection\n" \ @@ -43,7 +43,7 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r) { asm volatile(LOCK_PREFIX "addl %1,%0\n\t" REFCOUNT_CHECK_LT_ZERO - : [counter] "+m" (r->refs.counter) + : [var] "+m" (r->refs.counter) : "ir" (i) : "cc", "cx"); } @@ -52,7 +52,7 @@ static __always_inline void refcount_inc(refcount_t *r) { asm volatile(LOCK_PREFIX "incl %0\n\t" REFCOUNT_CHECK_LT_ZERO - : [counter] "+m" (r->refs.counter) + : [var] "+m" (r->refs.counter) : : "cc", "cx"); }
@@ -60,21 +60,23 @@ static __always_inline void refcount_dec(refcount_t *r) { asm volatile(LOCK_PREFIX "decl %0\n\t" REFCOUNT_CHECK_LE_ZERO - : [counter] "+m" (r->refs.counter) + : [var] "+m" (r->refs.counter) : : "cc", "cx"); }
static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { - GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "er", i, "%0", e, "cx"); + return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", + REFCOUNT_CHECK_LT_ZERO, + r->refs.counter, e, "er", i, "cx"); }
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { - GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "%0", e, "cx"); + return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", + REFCOUNT_CHECK_LT_ZERO, + r->refs.counter, e, "cx"); }
static __always_inline __must_check @@ -92,7 +94,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) /* Did we try to increment from/to an undesirable state? */ if (unlikely(c < 0 || c == INT_MAX || result < c)) { asm volatile(REFCOUNT_ERROR - : : [counter] "m" (r->refs.counter) + : : [var] "m" (r->refs.counter) : "cc", "cx"); break; } diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 033dc7ca49e9..8a9eba191516 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,56 +2,69 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc
+/* This counts to 12. Any more, it will return 13th argument. */ +#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __RMWcc_CONCAT(a, b) a ## b +#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b) + #define __CLOBBERS_MEM(clb...) "memory", ## clb
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO)
/* Use asm goto */
-#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ -do { \ +#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ +({ \ + bool c = false; \ asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ - : : [counter] "m" (var), ## __VA_ARGS__ \ + : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ - return 0; \ -cc_label: \ - return 1; \ -} while (0) - -#define __BINARY_RMWcc_ARG " %1, " - + if (0) { \ +cc_label: c = true; \ + } \ + c; \ +})
#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
/* Use flags output or a set instruction */
-#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ -do { \ +#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ +({ \ bool c; \ asm volatile (fullop CC_SET(cc) \ - : [counter] "+m" (var), CC_OUT(cc) (c) \ + : [var] "+m" (_var), CC_OUT(cc) (c) \ : __VA_ARGS__ : clobbers); \ - return c; \ -} while (0) - -#define __BINARY_RMWcc_ARG " %2, " + c; \ +})
#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
-#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ +#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\ - __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM(clobbers)) +#define GEN_UNARY_RMWcc_3(op, var, cc) \ + GEN_UNARY_RMWcc_4(op, var, cc, "%[var]")
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ - __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ - __CLOBBERS_MEM(), vcon (val)) +#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X) + +#define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \ + __GEN_RMWcc(op " %[val], " arg0, var, cc, \ + __CLOBBERS_MEM(), [val] vcon (_val)) + +#define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \ + GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]") + +#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X) + +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \ + __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \ + __CLOBBERS_MEM(clobbers))
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \ - clobbers...) \ - __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM(clobbers), vcon (val)) +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, cc, vcon, _val, clobbers...)\ + __GEN_RMWcc(op " %[val], %[var]\n\t" suffix, var, cc, \ + __CLOBBERS_MEM(clobbers), [val] vcon (_val))
#endif /* _ASM_X86_RMWcc */
From: Elena Reshetova elena.reshetova@intel.com
mainline inclusion from mainline-5.1-rc1 commit 47b8f3ab9c49daa824af848f9e02889662d8638f category: feature bugzilla: NA CVE: NA ---------------------------
This adds an smp_acquire__after_ctrl_dep() barrier on successful decrease of refcounter value from 1 to 0 for refcount_dec(sub)_and_test variants and therefore gives stronger memory ordering guarantees than prior versions of these functions.
Co-developed-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Elena Reshetova elena.reshetova@intel.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Andrea Parri andrea.parri@amarulasolutions.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Paul E. McKenney paulmck@linux.vnet.ibm.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Will Deacon will.deacon@arm.com Cc: dvyukov@google.com Cc: keescook@chromium.org Cc: stern@rowland.harvard.edu Link: https://lkml.kernel.org/r/1548847131-27854-2-git-send-email-elena.reshetova@... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/core-api/refcount-vs-atomic.rst | 24 ++++++++++++++++--- arch/x86/include/asm/refcount.h | 22 +++++++++++++---- lib/refcount.c | 18 ++++++++++---- 3 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst index 322851bada16..976e85adffe8 100644 --- a/Documentation/core-api/refcount-vs-atomic.rst +++ b/Documentation/core-api/refcount-vs-atomic.rst @@ -54,6 +54,13 @@ must propagate to all other CPUs before the release operation (A-cumulative property). This is implemented using :c:func:`smp_store_release`.
+An ACQUIRE memory ordering guarantees that all post loads and +stores (all po-later instructions) on the same CPU are +completed after the acquire operation. It also guarantees that all +po-later stores on the same CPU must propagate to all other CPUs +after the acquire operation executes. This is implemented using +:c:func:`smp_acquire__after_ctrl_dep`. + A control dependency (on success) for refcounters guarantees that if a reference for an object was successfully obtained (reference counter increment or addition happened, function returned true), @@ -119,13 +126,24 @@ Memory ordering guarantees changes: result of obtaining pointer to the object!
-case 5) - decrement-based RMW ops that return a value ------------------------------------------------------ +case 5) - generic dec/sub decrement-based RMW ops that return a value +---------------------------------------------------------------------
Function changes:
* :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test` * :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test` + +Memory ordering guarantees changes: + + * fully ordered --> RELEASE ordering + ACQUIRE ordering on success + + +case 6) other decrement-based RMW ops that return a value +--------------------------------------------------------- + +Function changes: + * no atomic counterpart --> :c:func:`refcount_dec_if_one` * ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)``
@@ -136,7 +154,7 @@ Memory ordering guarantees changes: .. note:: :c:func:`atomic_add_unless` only provides full order on success.
-case 6) - lock-based RMW +case 7) - lock-based RMW ------------------------
Function changes: diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index dbaed55c1c24..232f856e0db0 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -67,16 +67,30 @@ static __always_inline void refcount_dec(refcount_t *r) static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { - return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", + bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, r->refs.counter, e, "er", i, "cx"); + + if (ret) { + smp_acquire__after_ctrl_dep(); + return true; + } + + return false; }
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { - return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "cx"); + bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", + REFCOUNT_CHECK_LT_ZERO, + r->refs.counter, e, "cx"); + + if (ret) { + smp_acquire__after_ctrl_dep(); + return true; + } + + return false; }
static __always_inline __must_check diff --git a/lib/refcount.c b/lib/refcount.c index ebcf8cd49e05..6e904af0fb3e 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -33,6 +33,9 @@ * Note that the allocator is responsible for ordering things between free() * and alloc(). * + * The decrements dec_and_test() and sub_and_test() also provide acquire + * ordering on success. + * */
#include <linux/mutex.h> @@ -164,8 +167,8 @@ EXPORT_SYMBOL(refcount_inc_checked); * at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done - * before, and provides a control dependency such that free() must come after. - * See the comment on top. + * before, and provides an acquire ordering on success such that free() + * must come after. * * Use of this function is not recommended for the normal reference counting * use case in which references are taken and released one at a time. In these @@ -190,7 +193,12 @@ bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
- return !new; + if (!new) { + smp_acquire__after_ctrl_dep(); + return true; + } + return false; + } EXPORT_SYMBOL(refcount_sub_and_test_checked);
@@ -202,8 +210,8 @@ EXPORT_SYMBOL(refcount_sub_and_test_checked); * decrement when saturated at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done - * before, and provides a control dependency such that free() must come after. - * See the comment on top. + * before, and provides an acquire ordering on success such that free() + * must come after. * * Return: true if the resulting refcount is 0, false otherwise */