LoongArch: Relax memory ordering for atomic operations

This patch relaxes the implementation while satisfying the memory ordering
requirements for atomic operations, which will help improve performance on
LA664+.

Unixbench with full threads (8)
                                           before       after
  Dhrystone 2 using register variables   203910714.2  203909539.8   0.00%
  Double-Precision Whetstone                 37930.9        37931   0.00%
  Execl Throughput                           29431.5      29545.8   0.39%
  File Copy 1024 bufsize 2000 maxblocks    6645759.5      6676320   0.46%
  File Copy 256 bufsize 500 maxblocks      2138772.4    2144182.4   0.25%
  File Copy 4096 bufsize 8000 maxblocks   11640698.4     11602703  -0.33%
  Pipe Throughput                          8849077.7    8917009.4   0.77%
  Pipe-based Context Switching             1255108.5    1287277.3   2.56%
  Process Creation                           50825.9      50442.1  -0.76%
  Shell Scripts (1 concurrent)               25795.8      25942.3   0.57%
  Shell Scripts (8 concurrent)                3812.6       3835.2   0.59%
  System Call Overhead                     9248212.6    9353348.6   1.14%
                                                                  =======
  System Benchmarks Index Score               8076.6       8114.4   0.47%

Signed-off-by: WANG Rui <wangrui@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
This commit is contained in:
WANG Rui 2023-11-08 14:12:15 +08:00 committed by Huacai Chen
parent 71945968d8
commit affef66b65

View File

@ -36,19 +36,19 @@
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
__asm__ __volatile__( \
"am"#asm_op"_db.w" " $zero, %1, %0 \n" \
"am"#asm_op".w" " $zero, %1, %0 \n" \
: "+ZB" (v->counter) \
: "r" (I) \
: "memory"); \
}
#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \
static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \
{ \
int result; \
\
__asm__ __volatile__( \
"am"#asm_op"_db.w" " %1, %2, %0 \n" \
"am"#asm_op#mb".w" " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@ -56,13 +56,13 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
return result c_op I; \
}
#define ATOMIC_FETCH_OP(op, I, asm_op) \
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \
static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \
{ \
int result; \
\
__asm__ __volatile__( \
"am"#asm_op"_db.w" " %1, %2, %0 \n" \
"am"#asm_op#mb".w" " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
#define ATOMIC_OPS(op, I, asm_op, c_op) \
ATOMIC_OP(op, I, asm_op) \
ATOMIC_OP_RETURN(op, I, asm_op, c_op) \
ATOMIC_FETCH_OP(op, I, asm_op)
ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \
ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \
ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \
ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC_OPS(add, i, add, +)
ATOMIC_OPS(sub, -i, add, +)
#define arch_atomic_add_return arch_atomic_add_return
#define arch_atomic_add_return_acquire arch_atomic_add_return
#define arch_atomic_add_return_release arch_atomic_add_return
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return arch_atomic_sub_return
#define arch_atomic_sub_return_acquire arch_atomic_sub_return
#define arch_atomic_sub_return_release arch_atomic_sub_return
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
#define arch_atomic_fetch_add arch_atomic_fetch_add
#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add
#define arch_atomic_fetch_add_release arch_atomic_fetch_add
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub
#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, I, asm_op) \
ATOMIC_OP(op, I, asm_op) \
ATOMIC_FETCH_OP(op, I, asm_op)
ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \
ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC_OPS(and, i, and)
ATOMIC_OPS(or, i, or)
ATOMIC_OPS(xor, i, xor)
#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and
#define arch_atomic_fetch_and_release arch_atomic_fetch_and
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_or arch_atomic_fetch_or
#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or
#define arch_atomic_fetch_or_release arch_atomic_fetch_or
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor
#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#undef ATOMIC_OPS
@ -172,18 +196,18 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
static inline void arch_atomic64_##op(long i, atomic64_t *v) \
{ \
__asm__ __volatile__( \
"am"#asm_op"_db.d " " $zero, %1, %0 \n" \
"am"#asm_op".d " " $zero, %1, %0 \n" \
: "+ZB" (v->counter) \
: "r" (I) \
: "memory"); \
}
#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \
static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \
#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \
static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v) \
{ \
long result; \
__asm__ __volatile__( \
"am"#asm_op"_db.d " " %1, %2, %0 \n" \
"am"#asm_op#mb".d " " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@ -191,13 +215,13 @@ static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \
return result c_op I; \
}
#define ATOMIC64_FETCH_OP(op, I, asm_op) \
static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \
#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \
static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v) \
{ \
long result; \
\
__asm__ __volatile__( \
"am"#asm_op"_db.d " " %1, %2, %0 \n" \
"am"#asm_op#mb".d " " %1, %2, %0 \n" \
: "+ZB" (v->counter), "=&r" (result) \
: "r" (I) \
: "memory"); \
@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \
#define ATOMIC64_OPS(op, I, asm_op, c_op) \
ATOMIC64_OP(op, I, asm_op) \
ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \
ATOMIC64_FETCH_OP(op, I, asm_op)
ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \
ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \
ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \
ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC64_OPS(add, i, add, +)
ATOMIC64_OPS(sub, -i, add, +)
#define arch_atomic64_add_return arch_atomic64_add_return
#define arch_atomic64_add_return_acquire arch_atomic64_add_return
#define arch_atomic64_add_return_release arch_atomic64_add_return
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return arch_atomic64_sub_return
#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return
#define arch_atomic64_sub_return_release arch_atomic64_sub_return
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add
#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub
#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, I, asm_op) \
ATOMIC64_OP(op, I, asm_op) \
ATOMIC64_FETCH_OP(op, I, asm_op)
ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \
ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed)
ATOMIC64_OPS(and, i, and)
ATOMIC64_OPS(or, i, or)
ATOMIC64_OPS(xor, i, xor)
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and
#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or
#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor
#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#undef ATOMIC64_OPS