mirror of
https://github.com/torvalds/linux.git
synced 2024-12-27 05:11:48 +00:00
Merge branch 'lkmm-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into locking/urgent
Pull the "Linux kernel memory model" tooling implementation from Paul E. McKenney: 'This pull request contains a single commit that adds a memory model to the tools directory. This memory model can (roughly speaking) be thought of as an automated version of memory-barriers.txt. It is written in the "cat" language, which is executable by the externally provided "herd7" simulator, which exhaustively explores the state space of small litmus tests.' Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Peter Zijlstra <peterz@infradead.org>
This commit is contained in:
commit
7246a966b6
30
tools/memory-model/Documentation/cheatsheet.txt
Normal file
30
tools/memory-model/Documentation/cheatsheet.txt
Normal file
@ -0,0 +1,30 @@
|
||||
Prior Operation Subsequent Operation
|
||||
--------------- ---------------------------
|
||||
C Self R W RWM Self R W DR DW RMW SV
|
||||
__ ---- - - --- ---- - - -- -- --- --
|
||||
|
||||
Store, e.g., WRITE_ONCE() Y Y
|
||||
Load, e.g., READ_ONCE() Y Y Y
|
||||
Unsuccessful RMW operation Y Y Y
|
||||
smp_read_barrier_depends() Y Y Y
|
||||
*_dereference() Y Y Y Y
|
||||
Successful *_acquire() R Y Y Y Y Y Y
|
||||
Successful *_release() C Y Y Y W Y
|
||||
smp_rmb() Y R Y Y R
|
||||
smp_wmb() Y W Y Y W
|
||||
smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y
|
||||
Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y
|
||||
smp_mb__before_atomic() CP Y Y Y a a a a Y
|
||||
smp_mb__after_atomic() CP a a Y Y Y Y Y
|
||||
|
||||
|
||||
Key: C: Ordering is cumulative
|
||||
P: Ordering propagates
|
||||
R: Read, for example, READ_ONCE(), or read portion of RMW
|
||||
W: Write, for example, WRITE_ONCE(), or write portion of RMW
|
||||
Y: Provides ordering
|
||||
a: Provides ordering given intervening RMW atomic operation
|
||||
DR: Dependent read (address dependency)
|
||||
DW: Dependent write (address, data, or control dependency)
|
||||
RMW: Atomic read-modify-write operation
|
||||
SV Same-variable access
|
1840
tools/memory-model/Documentation/explanation.txt
Normal file
1840
tools/memory-model/Documentation/explanation.txt
Normal file
File diff suppressed because it is too large
Load Diff
570
tools/memory-model/Documentation/recipes.txt
Normal file
570
tools/memory-model/Documentation/recipes.txt
Normal file
@ -0,0 +1,570 @@
|
||||
This document provides "recipes", that is, litmus tests for commonly
|
||||
occurring situations, as well as a few that illustrate subtly broken but
|
||||
attractive nuisances. Many of these recipes include example code from
|
||||
v4.13 of the Linux kernel.
|
||||
|
||||
The first section covers simple special cases, the second section
|
||||
takes off the training wheels to cover more involved examples,
|
||||
and the third section provides a few rules of thumb.
|
||||
|
||||
|
||||
Simple special cases
|
||||
====================
|
||||
|
||||
This section presents two simple special cases, the first being where
|
||||
there is only one CPU or only one memory location is accessed, and the
|
||||
second being use of that old concurrency workhorse, locking.
|
||||
|
||||
|
||||
Single CPU or single memory location
|
||||
------------------------------------
|
||||
|
||||
If there is only one CPU on the one hand or only one variable
|
||||
on the other, the code will execute in order. There are (as
|
||||
usual) some things to be careful of:
|
||||
|
||||
1. Some aspects of the C language are unordered. For example,
|
||||
in the expression "f(x) + g(y)", the order in which f and g are
|
||||
called is not defined; the object code is allowed to use either
|
||||
order or even to interleave the computations.
|
||||
|
||||
2. Compilers are permitted to use the "as-if" rule. That is, a
|
||||
compiler can emit whatever code it likes for normal accesses,
|
||||
as long as the results of a single-threaded execution appear
|
||||
just as if the compiler had followed all the relevant rules.
|
||||
To see this, compile with a high level of optimization and run
|
||||
the debugger on the resulting binary.
|
||||
|
||||
3. If there is only one variable but multiple CPUs, that variable
|
||||
must be properly aligned and all accesses to that variable must
|
||||
be full sized. Variables that straddle cachelines or pages void
|
||||
your full-ordering warranty, as do undersized accesses that load
|
||||
from or store to only part of the variable.
|
||||
|
||||
4. If there are multiple CPUs, accesses to shared variables should
|
||||
use READ_ONCE() and WRITE_ONCE() or stronger to prevent load/store
|
||||
tearing, load/store fusing, and invented loads and stores.
|
||||
There are exceptions to this rule, including:
|
||||
|
||||
i. When there is no possibility of a given shared variable
|
||||
being updated by some other CPU, for example, while
|
||||
holding the update-side lock, reads from that variable
|
||||
need not use READ_ONCE().
|
||||
|
||||
ii. When there is no possibility of a given shared variable
|
||||
being either read or updated by other CPUs, for example,
|
||||
when running during early boot, reads from that variable
|
||||
need not use READ_ONCE() and writes to that variable
|
||||
need not use WRITE_ONCE().
|
||||
|
||||
|
||||
Locking
|
||||
-------
|
||||
|
||||
Locking is well-known and straightforward, at least if you don't think
|
||||
about it too hard. And the basic rule is indeed quite simple: Any CPU that
|
||||
has acquired a given lock sees any changes previously seen or made by any
|
||||
CPU before it released that same lock. Note that this statement is a bit
|
||||
stronger than "Any CPU holding a given lock sees all changes made by any
|
||||
CPU during the time that CPU was holding this same lock". For example,
|
||||
consider the following pair of code fragments:
|
||||
|
||||
/* See MP+polocks.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
WRITE_ONCE(x, 1);
|
||||
spin_lock(&mylock);
|
||||
WRITE_ONCE(y, 1);
|
||||
spin_unlock(&mylock);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
spin_lock(&mylock);
|
||||
r0 = READ_ONCE(y);
|
||||
spin_unlock(&mylock);
|
||||
r1 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
The basic rule guarantees that if CPU0() acquires mylock before CPU1(),
|
||||
then both r0 and r1 must be set to the value 1. This also has the
|
||||
consequence that if the final value of r0 is equal to 1, then the final
|
||||
value of r1 must also be equal to 1. In contrast, the weaker rule would
|
||||
say nothing about the final value of r1.
|
||||
|
||||
The converse to the basic rule also holds, as illustrated by the
|
||||
following litmus test:
|
||||
|
||||
/* See MP+porevlocks.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
r0 = READ_ONCE(y);
|
||||
spin_lock(&mylock);
|
||||
r1 = READ_ONCE(x);
|
||||
spin_unlock(&mylock);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
spin_lock(&mylock);
|
||||
WRITE_ONCE(x, 1);
|
||||
spin_unlock(&mylock);
|
||||
WRITE_ONCE(y, 1);
|
||||
}
|
||||
|
||||
This converse to the basic rule guarantees that if CPU0() acquires
|
||||
mylock before CPU1(), then both r0 and r1 must be set to the value 0.
|
||||
This also has the consequence that if the final value of r1 is equal
|
||||
to 0, then the final value of r0 must also be equal to 0. In contrast,
|
||||
the weaker rule would say nothing about the final value of r0.
|
||||
|
||||
These examples show only a single pair of CPUs, but the effects of the
|
||||
locking basic rule extend across multiple acquisitions of a given lock
|
||||
across multiple CPUs.
|
||||
|
||||
However, it is not necessarily the case that accesses ordered by
|
||||
locking will be seen as ordered by CPUs not holding that lock.
|
||||
Consider this example:
|
||||
|
||||
/* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
spin_lock(&mylock);
|
||||
WRITE_ONCE(x, 1);
|
||||
WRITE_ONCE(y, 1);
|
||||
spin_unlock(&mylock);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
spin_lock(&mylock);
|
||||
r0 = READ_ONCE(y);
|
||||
WRITE_ONCE(z, 1);
|
||||
spin_unlock(&mylock);
|
||||
}
|
||||
|
||||
void CPU2(void)
|
||||
{
|
||||
WRITE_ONCE(z, 2);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
Counter-intuitive though it might be, it is quite possible to have
|
||||
the final value of r0 be 1, the final value of z be 2, and the final
|
||||
value of r1 be 0. The reason for this surprising outcome is that
|
||||
CPU2() never acquired the lock, and thus did not benefit from the
|
||||
lock's ordering properties.
|
||||
|
||||
Ordering can be extended to CPUs not holding the lock by careful use
|
||||
of smp_mb__after_spinlock():
|
||||
|
||||
/* See Z6.0+pooncelock+poonceLock+pombonce.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
spin_lock(&mylock);
|
||||
WRITE_ONCE(x, 1);
|
||||
WRITE_ONCE(y, 1);
|
||||
spin_unlock(&mylock);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
spin_lock(&mylock);
|
||||
smp_mb__after_spinlock();
|
||||
r0 = READ_ONCE(y);
|
||||
WRITE_ONCE(z, 1);
|
||||
spin_unlock(&mylock);
|
||||
}
|
||||
|
||||
void CPU2(void)
|
||||
{
|
||||
WRITE_ONCE(z, 2);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
This addition of smp_mb__after_spinlock() strengthens the lock acquisition
|
||||
sufficiently to rule out the counter-intuitive outcome.
|
||||
|
||||
|
||||
Taking off the training wheels
|
||||
==============================
|
||||
|
||||
This section looks at more complex examples, including message passing,
|
||||
load buffering, release-acquire chains, store buffering.
|
||||
Many classes of litmus tests have abbreviated names, which may be found
|
||||
here: https://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test6.pdf
|
||||
|
||||
|
||||
Message passing (MP)
|
||||
--------------------
|
||||
|
||||
The MP pattern has one CPU execute a pair of stores to a pair of variables
|
||||
and another CPU execute a pair of loads from this same pair of variables,
|
||||
but in the opposite order. The goal is to avoid the counter-intuitive
|
||||
outcome in which the first load sees the value written by the second store
|
||||
but the second load does not see the value written by the first store.
|
||||
In the absence of any ordering, this goal may not be met, as can be seen
|
||||
in the MP+poonceonces.litmus litmus test. This section therefore looks at
|
||||
a number of ways of meeting this goal.
|
||||
|
||||
|
||||
Release and acquire
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use of smp_store_release() and smp_load_acquire() is one way to force
|
||||
the desired MP ordering. The general approach is shown below:
|
||||
|
||||
/* See MP+pooncerelease+poacquireonce.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
WRITE_ONCE(x, 1);
|
||||
smp_store_release(&y, 1);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
r0 = smp_load_acquire(&y);
|
||||
r1 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
The smp_store_release() macro orders any prior accesses against the
|
||||
store, while the smp_load_acquire macro orders the load against any
|
||||
subsequent accesses. Therefore, if the final value of r0 is the value 1,
|
||||
the final value of r1 must also be the value 1.
|
||||
|
||||
The init_stack_slab() function in lib/stackdepot.c uses release-acquire
|
||||
in this way to safely initialize of a slab of the stack. Working out
|
||||
the mutual-exclusion design is left as an exercise for the reader.
|
||||
|
||||
|
||||
Assign and dereference
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use of rcu_assign_pointer() and rcu_dereference() is quite similar to the
|
||||
use of smp_store_release() and smp_load_acquire(), except that both
|
||||
rcu_assign_pointer() and rcu_dereference() operate on RCU-protected
|
||||
pointers. The general approach is shown below:
|
||||
|
||||
/* See MP+onceassign+derefonce.litmus. */
|
||||
int z;
|
||||
int *y = &z;
|
||||
int x;
|
||||
|
||||
void CPU0(void)
|
||||
{
|
||||
WRITE_ONCE(x, 1);
|
||||
rcu_assign_pointer(y, &x);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
rcu_read_lock();
|
||||
r0 = rcu_dereference(y);
|
||||
r1 = READ_ONCE(*r0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
In this example, if the final value of r0 is &x then the final value of
|
||||
r1 must be 1.
|
||||
|
||||
The rcu_assign_pointer() macro has the same ordering properties as does
|
||||
smp_store_release(), but the rcu_dereference() macro orders the load only
|
||||
against later accesses that depend on the value loaded. A dependency
|
||||
is present if the value loaded determines the address of a later access
|
||||
(address dependency, as shown above), the value written by a later store
|
||||
(data dependency), or whether or not a later store is executed in the
|
||||
first place (control dependency). Note that the term "data dependency"
|
||||
is sometimes casually used to cover both address and data dependencies.
|
||||
|
||||
In lib/prime_numbers.c, the expand_to_next_prime() function invokes
|
||||
rcu_assign_pointer(), and the next_prime_number() function invokes
|
||||
rcu_dereference(). This combination mediates access to a bit vector
|
||||
that is expanded as additional primes are needed.
|
||||
|
||||
|
||||
Write and read memory barriers
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It is usually better to use smp_store_release() instead of smp_wmb()
|
||||
and to use smp_load_acquire() instead of smp_rmb(). However, the older
|
||||
smp_wmb() and smp_rmb() APIs are still heavily used, so it is important
|
||||
to understand their use cases. The general approach is shown below:
|
||||
|
||||
/* See MP+wmbonceonce+rmbonceonce.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
WRITE_ONCE(x, 1);
|
||||
smp_wmb();
|
||||
WRITE_ONCE(y, 1);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
r0 = READ_ONCE(y);
|
||||
smp_rmb();
|
||||
r1 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
The smp_wmb() macro orders prior stores against later stores, and the
|
||||
smp_rmb() macro orders prior loads against later loads. Therefore, if
|
||||
the final value of r0 is 1, the final value of r1 must also be 1.
|
||||
|
||||
The the xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
|
||||
the following write-side code fragment:
|
||||
|
||||
log->l_curr_block -= log->l_logBBsize;
|
||||
ASSERT(log->l_curr_block >= 0);
|
||||
smp_wmb();
|
||||
log->l_curr_cycle++;
|
||||
|
||||
And the xlog_valid_lsn() function in fs/xfs/xfs_log_priv.h contains
|
||||
the corresponding read-side code fragment:
|
||||
|
||||
cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
|
||||
smp_rmb();
|
||||
cur_block = ACCESS_ONCE(log->l_curr_block);
|
||||
|
||||
Alternatively, consider the following comment in function
|
||||
perf_output_put_handle() in kernel/events/ring_buffer.c:
|
||||
|
||||
* kernel user
|
||||
*
|
||||
* if (LOAD ->data_tail) { LOAD ->data_head
|
||||
* (A) smp_rmb() (C)
|
||||
* STORE $data LOAD $data
|
||||
* smp_wmb() (B) smp_mb() (D)
|
||||
* STORE ->data_head STORE ->data_tail
|
||||
* }
|
||||
|
||||
The B/C pairing is an example of the MP pattern using smp_wmb() on the
|
||||
write side and smp_rmb() on the read side.
|
||||
|
||||
Of course, given that smp_mb() is strictly stronger than either smp_wmb()
|
||||
or smp_rmb(), any code fragment that would work with smp_rmb() and
|
||||
smp_wmb() would also work with smp_mb() replacing either or both of the
|
||||
weaker barriers.
|
||||
|
||||
|
||||
Load buffering (LB)
|
||||
-------------------
|
||||
|
||||
The LB pattern has one CPU load from one variable and then store to a
|
||||
second, while another CPU loads from the second variable and then stores
|
||||
to the first. The goal is to avoid the counter-intuitive situation where
|
||||
each load reads the value written by the other CPU's store. In the
|
||||
absence of any ordering it is quite possible that this may happen, as
|
||||
can be seen in the LB+poonceonces.litmus litmus test.
|
||||
|
||||
One way of avoiding the counter-intuitive outcome is through the use of a
|
||||
control dependency paired with a full memory barrier:
|
||||
|
||||
/* See LB+ctrlonceonce+mbonceonce.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
r0 = READ_ONCE(x);
|
||||
if (r0)
|
||||
WRITE_ONCE(y, 1);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
r1 = READ_ONCE(y);
|
||||
smp_mb();
|
||||
WRITE_ONCE(x, 1);
|
||||
}
|
||||
|
||||
This pairing of a control dependency in CPU0() with a full memory
|
||||
barrier in CPU1() prevents r0 and r1 from both ending up equal to 1.
|
||||
|
||||
The A/D pairing from the ring-buffer use case shown earlier also
|
||||
illustrates LB. Here is a repeat of the comment in
|
||||
perf_output_put_handle() in kernel/events/ring_buffer.c, showing a
|
||||
control dependency on the kernel side and a full memory barrier on
|
||||
the user side:
|
||||
|
||||
* kernel user
|
||||
*
|
||||
* if (LOAD ->data_tail) { LOAD ->data_head
|
||||
* (A) smp_rmb() (C)
|
||||
* STORE $data LOAD $data
|
||||
* smp_wmb() (B) smp_mb() (D)
|
||||
* STORE ->data_head STORE ->data_tail
|
||||
* }
|
||||
*
|
||||
* Where A pairs with D, and B pairs with C.
|
||||
|
||||
The kernel's control dependency between the load from ->data_tail
|
||||
and the store to data combined with the user's full memory barrier
|
||||
between the load from data and the store to ->data_tail prevents
|
||||
the counter-intuitive outcome where the kernel overwrites the data
|
||||
before the user gets done loading it.
|
||||
|
||||
|
||||
Release-acquire chains
|
||||
----------------------
|
||||
|
||||
Release-acquire chains are a low-overhead, flexible, and easy-to-use
|
||||
method of maintaining order. However, they do have some limitations that
|
||||
need to be fully understood. Here is an example that maintains order:
|
||||
|
||||
/* See ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
WRITE_ONCE(x, 1);
|
||||
smp_store_release(&y, 1);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
r0 = smp_load_acquire(y);
|
||||
smp_store_release(&z, 1);
|
||||
}
|
||||
|
||||
void CPU2(void)
|
||||
{
|
||||
r1 = smp_load_acquire(z);
|
||||
r2 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
In this case, if r0 and r1 both have final values of 1, then r2 must
|
||||
also have a final value of 1.
|
||||
|
||||
The ordering in this example is stronger than it needs to be. For
|
||||
example, ordering would still be preserved if CPU1()'s smp_load_acquire()
|
||||
invocation was replaced with READ_ONCE().
|
||||
|
||||
It is tempting to assume that CPU0()'s store to x is globally ordered
|
||||
before CPU1()'s store to z, but this is not the case:
|
||||
|
||||
/* See Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
WRITE_ONCE(x, 1);
|
||||
smp_store_release(&y, 1);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
r0 = smp_load_acquire(y);
|
||||
smp_store_release(&z, 1);
|
||||
}
|
||||
|
||||
void CPU2(void)
|
||||
{
|
||||
WRITE_ONCE(z, 2);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
One might hope that if the final value of r0 is 1 and the final value
|
||||
of z is 2, then the final value of r1 must also be 1, but it really is
|
||||
possible for r1 to have the final value of 0. The reason, of course,
|
||||
is that in this version, CPU2() is not part of the release-acquire chain.
|
||||
This situation is accounted for in the rules of thumb below.
|
||||
|
||||
Despite this limitation, release-acquire chains are low-overhead as
|
||||
well as simple and powerful, at least as memory-ordering mechanisms go.
|
||||
|
||||
|
||||
Store buffering
|
||||
---------------
|
||||
|
||||
Store buffering can be thought of as upside-down load buffering, so
|
||||
that one CPU first stores to one variable and then loads from a second,
|
||||
while another CPU stores to the second variable and then loads from the
|
||||
first. Preserving order requires nothing less than full barriers:
|
||||
|
||||
/* See SB+mbonceonces.litmus. */
|
||||
void CPU0(void)
|
||||
{
|
||||
WRITE_ONCE(x, 1);
|
||||
smp_mb();
|
||||
r0 = READ_ONCE(y);
|
||||
}
|
||||
|
||||
void CPU1(void)
|
||||
{
|
||||
WRITE_ONCE(y, 1);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(x);
|
||||
}
|
||||
|
||||
Omitting either smp_mb() will allow both r0 and r1 to have final
|
||||
values of 0, but providing both full barriers as shown above prevents
|
||||
this counter-intuitive outcome.
|
||||
|
||||
This pattern most famously appears as part of Dekker's locking
|
||||
algorithm, but it has a much more practical use within the Linux kernel
|
||||
of ordering wakeups. The following comment taken from waitqueue_active()
|
||||
in include/linux/wait.h shows the canonical pattern:
|
||||
|
||||
* CPU0 - waker CPU1 - waiter
|
||||
*
|
||||
* for (;;) {
|
||||
* @cond = true; prepare_to_wait(&wq_head, &wait, state);
|
||||
* smp_mb(); // smp_mb() from set_current_state()
|
||||
* if (waitqueue_active(wq_head)) if (@cond)
|
||||
* wake_up(wq_head); break;
|
||||
* schedule();
|
||||
* }
|
||||
* finish_wait(&wq_head, &wait);
|
||||
|
||||
On CPU0, the store is to @cond and the load is in waitqueue_active().
|
||||
On CPU1, prepare_to_wait() contains both a store to wq_head and a call
|
||||
to set_current_state(), which contains an smp_mb() barrier; the load is
|
||||
"if (@cond)". The full barriers prevent the undesirable outcome where
|
||||
CPU1 puts the waiting task to sleep and CPU0 fails to wake it up.
|
||||
|
||||
Note that use of locking can greatly simplify this pattern.
|
||||
|
||||
|
||||
Rules of thumb
|
||||
==============
|
||||
|
||||
There might seem to be no pattern governing what ordering primitives are
|
||||
needed in which situations, but this is not the case. There is a pattern
|
||||
based on the relation between the accesses linking successive CPUs in a
|
||||
given litmus test. There are three types of linkage:
|
||||
|
||||
1. Write-to-read, where the next CPU reads the value that the
|
||||
previous CPU wrote. The LB litmus-test patterns contain only
|
||||
this type of relation. In formal memory-modeling texts, this
|
||||
relation is called "reads-from" and is usually abbreviated "rf".
|
||||
|
||||
2. Read-to-write, where the next CPU overwrites the value that the
|
||||
previous CPU read. The SB litmus test contains only this type
|
||||
of relation. In formal memory-modeling texts, this relation is
|
||||
often called "from-reads" and is sometimes abbreviated "fr".
|
||||
|
||||
3. Write-to-write, where the next CPU overwrites the value written
|
||||
by the previous CPU. The Z6.0 litmus test pattern contains a
|
||||
write-to-write relation between the last access of CPU1() and
|
||||
the first access of CPU2(). In formal memory-modeling texts,
|
||||
this relation is often called "coherence order" and is sometimes
|
||||
abbreviated "co". In the C++ standard, it is instead called
|
||||
"modification order" and often abbreviated "mo".
|
||||
|
||||
The strength of memory ordering required for a given litmus test to
|
||||
avoid a counter-intuitive outcome depends on the types of relations
|
||||
linking the memory accesses for the outcome in question:
|
||||
|
||||
o If all links are write-to-read links, then the weakest
|
||||
possible ordering within each CPU suffices. For example, in
|
||||
the LB litmus test, a control dependency was enough to do the
|
||||
job.
|
||||
|
||||
o If all but one of the links are write-to-read links, then a
|
||||
release-acquire chain suffices. Both the MP and the ISA2
|
||||
litmus tests illustrate this case.
|
||||
|
||||
o If more than one of the links are something other than
|
||||
write-to-read links, then a full memory barrier is required
|
||||
between each successive pair of non-write-to-read links. This
|
||||
case is illustrated by the Z6.0 litmus tests, both in the
|
||||
locking and in the release-acquire sections.
|
||||
|
||||
However, if you find yourself having to stretch these rules of thumb
|
||||
to fit your situation, you should consider creating a litmus test and
|
||||
running it on the model.
|
107
tools/memory-model/Documentation/references.txt
Normal file
107
tools/memory-model/Documentation/references.txt
Normal file
@ -0,0 +1,107 @@
|
||||
This document provides background reading for memory models and related
|
||||
tools. These documents are aimed at kernel hackers who are interested
|
||||
in memory models.
|
||||
|
||||
|
||||
Hardware manuals and models
|
||||
===========================
|
||||
|
||||
o SPARC International Inc. (Ed.). 1994. "The SPARC Architecture
|
||||
Reference Manual Version 9". SPARC International Inc.
|
||||
|
||||
o Compaq Computer Corporation (Ed.). 2002. "Alpha Architecture
|
||||
Reference Manual". Compaq Computer Corporation.
|
||||
|
||||
o Intel Corporation (Ed.). 2002. "A Formal Specification of Intel
|
||||
Itanium Processor Family Memory Ordering". Intel Corporation.
|
||||
|
||||
o Intel Corporation (Ed.). 2002. "Intel 64 and IA-32 Architectures
|
||||
Software Developer’s Manual". Intel Corporation.
|
||||
|
||||
o Peter Sewell, Susmit Sarkar, Scott Owens, Francesco Zappa Nardelli,
|
||||
and Magnus O. Myreen. 2010. "x86-TSO: A Rigorous and Usable
|
||||
Programmer's Model for x86 Multiprocessors". Commun. ACM 53, 7
|
||||
(July, 2010), 89-97. http://doi.acm.org/10.1145/1785414.1785443
|
||||
|
||||
o IBM Corporation (Ed.). 2009. "Power ISA Version 2.06". IBM
|
||||
Corporation.
|
||||
|
||||
o ARM Ltd. (Ed.). 2009. "ARM Barrier Litmus Tests and Cookbook".
|
||||
ARM Ltd.
|
||||
|
||||
o Susmit Sarkar, Peter Sewell, Jade Alglave, Luc Maranget, and
|
||||
Derek Williams. 2011. "Understanding POWER Multiprocessors". In
|
||||
Proceedings of the 32Nd ACM SIGPLAN Conference on Programming
|
||||
Language Design and Implementation (PLDI ’11). ACM, New York,
|
||||
NY, USA, 175–186.
|
||||
|
||||
o Susmit Sarkar, Kayvan Memarian, Scott Owens, Mark Batty,
|
||||
Peter Sewell, Luc Maranget, Jade Alglave, and Derek Williams.
|
||||
2012. "Synchronising C/C++ and POWER". In Proceedings of the 33rd
|
||||
ACM SIGPLAN Conference on Programming Language Design and
|
||||
Implementation (PLDI '12). ACM, New York, NY, USA, 311-322.
|
||||
|
||||
o ARM Ltd. (Ed.). 2014. "ARM Architecture Reference Manual (ARMv8,
|
||||
for ARMv8-A architecture profile)". ARM Ltd.
|
||||
|
||||
o Imagination Technologies, LTD. 2015. "MIPS(R) Architecture
|
||||
For Programmers, Volume II-A: The MIPS64(R) Instruction,
|
||||
Set Reference Manual". Imagination Technologies,
|
||||
LTD. https://imgtec.com/?do-download=4302.
|
||||
|
||||
o Shaked Flur, Kathryn E. Gray, Christopher Pulte, Susmit
|
||||
Sarkar, Ali Sezgin, Luc Maranget, Will Deacon, and Peter
|
||||
Sewell. 2016. "Modelling the ARMv8 Architecture, Operationally:
|
||||
Concurrency and ISA". In Proceedings of the 43rd Annual ACM
|
||||
SIGPLAN-SIGACT Symposium on Principles of Programming Languages
|
||||
(POPL ’16). ACM, New York, NY, USA, 608–621.
|
||||
|
||||
o Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis,
|
||||
Luc Maranget, Kathryn E. Gray, Ali Sezgin, Mark Batty, and Peter
|
||||
Sewell. 2017. "Mixed-size Concurrency: ARM, POWER, C/C++11,
|
||||
and SC". In Proceedings of the 44th ACM SIGPLAN Symposium on
|
||||
Principles of Programming Languages (POPL 2017). ACM, New York,
|
||||
NY, USA, 429–442.
|
||||
|
||||
|
||||
Linux-kernel memory model
|
||||
=========================
|
||||
|
||||
o Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney,
|
||||
and Jade Alglave. 2017. "A formal model of
|
||||
Linux-kernel memory ordering - companion webpage".
|
||||
http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online;
|
||||
accessed 30-January-2017].
|
||||
|
||||
o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
|
||||
Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)"
|
||||
Linux Weekly News. https://lwn.net/Articles/718628/
|
||||
|
||||
o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
|
||||
Alan Stern. 2017. "A formal kernel memory-ordering model (part 2)"
|
||||
Linux Weekly News. https://lwn.net/Articles/720550/
|
||||
|
||||
|
||||
Memory-model tooling
|
||||
====================
|
||||
|
||||
o Daniel Jackson. 2002. "Alloy: A Lightweight Object Modelling
|
||||
Notation". ACM Trans. Softw. Eng. Methodol. 11, 2 (April 2002),
|
||||
256–290. http://doi.acm.org/10.1145/505145.505149
|
||||
|
||||
o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
|
||||
Cats: Modelling, Simulation, Testing, and Data Mining for Weak
|
||||
Memory". ACM Trans. Program. Lang. Syst. 36, 2, Article 7 (July
|
||||
2014), 7:1–7:74 pages.
|
||||
|
||||
o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
|
||||
semantics of the weak consistency model specification language
|
||||
cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
|
||||
|
||||
|
||||
Memory-model comparisons
|
||||
========================
|
||||
|
||||
o Paul E. McKenney, Ulrich Weigand, Andrea Parri, and Boqun
|
||||
Feng. 2016. "Linux-Kernel Memory Model". (6 June 2016).
|
||||
http://open-std.org/JTC1/SC22/WG21/docs/papers/2016/p0124r2.html.
|
15
tools/memory-model/MAINTAINERS
Normal file
15
tools/memory-model/MAINTAINERS
Normal file
@ -0,0 +1,15 @@
|
||||
LINUX KERNEL MEMORY MODEL
|
||||
M: Alan Stern <stern@rowland.harvard.edu>
|
||||
M: Andrea Parri <parri.andrea@gmail.com>
|
||||
M: Will Deacon <will.deacon@arm.com>
|
||||
M: Peter Zijlstra <peterz@infradead.org>
|
||||
M: Boqun Feng <boqun.feng@gmail.com>
|
||||
M: Nicholas Piggin <npiggin@gmail.com>
|
||||
M: David Howells <dhowells@redhat.com>
|
||||
M: Jade Alglave <j.alglave@ucl.ac.uk>
|
||||
M: Luc Maranget <luc.maranget@inria.fr>
|
||||
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
|
||||
F: tools/memory-model/
|
220
tools/memory-model/README
Normal file
220
tools/memory-model/README
Normal file
@ -0,0 +1,220 @@
|
||||
=========================
|
||||
LINUX KERNEL MEMORY MODEL
|
||||
=========================
|
||||
|
||||
============
|
||||
INTRODUCTION
|
||||
============
|
||||
|
||||
This directory contains the memory model of the Linux kernel, written
|
||||
in the "cat" language and executable by the (externally provided)
|
||||
"herd7" simulator, which exhaustively explores the state space of
|
||||
small litmus tests.
|
||||
|
||||
In addition, the "klitmus7" tool (also externally provided) may be used
|
||||
to convert a litmus test to a Linux kernel module, which in turn allows
|
||||
that litmus test to be exercised within the Linux kernel.
|
||||
|
||||
|
||||
============
|
||||
REQUIREMENTS
|
||||
============
|
||||
|
||||
The "herd7" and "klitmus7" tools must be downloaded separately:
|
||||
|
||||
https://github.com/herd/herdtools7
|
||||
|
||||
See "herdtools7/INSTALL.md" for installation instructions.
|
||||
|
||||
Alternatively, Abhishek Bhardwaj has kindly provided a Docker image
|
||||
of these tools at "abhishek40/memory-model". Abhishek suggests the
|
||||
following commands to install and use this image:
|
||||
|
||||
- Users should install Docker for their distribution.
|
||||
- docker run -itd abhishek40/memory-model
|
||||
- docker attach <id-emitted-from-the-previous-command>
|
||||
|
||||
Gentoo users might wish to make use of Patrick McLean's package:
|
||||
|
||||
https://gitweb.gentoo.org/repo/gentoo.git/tree/dev-util/herdtools7
|
||||
|
||||
These packages may not be up-to-date with respect to the GitHub
|
||||
repository.
|
||||
|
||||
|
||||
==================
|
||||
BASIC USAGE: HERD7
|
||||
==================
|
||||
|
||||
The memory model is used, in conjunction with "herd7", to exhaustively
|
||||
explore the state space of small litmus tests.
|
||||
|
||||
For example, to run SB+mbonceonces.litmus against the memory model:
|
||||
|
||||
$ herd7 -conf linux-kernel.cfg litmus-tests/SB+mbonceonces.litmus
|
||||
|
||||
Here is the corresponding output:
|
||||
|
||||
Test SB+mbonceonces Allowed
|
||||
States 3
|
||||
0:r0=0; 1:r0=1;
|
||||
0:r0=1; 1:r0=0;
|
||||
0:r0=1; 1:r0=1;
|
||||
No
|
||||
Witnesses
|
||||
Positive: 0 Negative: 3
|
||||
Condition exists (0:r0=0 /\ 1:r0=0)
|
||||
Observation SB+mbonceonces Never 0 3
|
||||
Time SB+mbonceonces 0.01
|
||||
Hash=d66d99523e2cac6b06e66f4c995ebb48
|
||||
|
||||
The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
|
||||
this litmus test's "exists" clause can not be satisfied.
|
||||
|
||||
See "herd7 -help" or "herdtools7/doc/" for more information.
|
||||
|
||||
|
||||
=====================
|
||||
BASIC USAGE: KLITMUS7
|
||||
=====================
|
||||
|
||||
The "klitmus7" tool converts a litmus test into a Linux kernel module,
|
||||
which may then be loaded and run.
|
||||
|
||||
For example, to run SB+mbonceonces.litmus against hardware:
|
||||
|
||||
$ mkdir mymodules
|
||||
$ klitmus7 -o mymodules litmus-tests/SB+mbonceonces.litmus
|
||||
$ cd mymodules ; make
|
||||
$ sudo sh run.sh
|
||||
|
||||
The corresponding output includes:
|
||||
|
||||
Test SB+mbonceonces Allowed
|
||||
Histogram (3 states)
|
||||
644580 :>0:r0=1; 1:r0=0;
|
||||
644328 :>0:r0=0; 1:r0=1;
|
||||
711092 :>0:r0=1; 1:r0=1;
|
||||
No
|
||||
Witnesses
|
||||
Positive: 0, Negative: 2000000
|
||||
Condition exists (0:r0=0 /\ 1:r0=0) is NOT validated
|
||||
Hash=d66d99523e2cac6b06e66f4c995ebb48
|
||||
Observation SB+mbonceonces Never 0 2000000
|
||||
Time SB+mbonceonces 0.16
|
||||
|
||||
The "Positive: 0 Negative: 2000000" and the "Never 0 2000000" indicate
|
||||
that during two million trials, the state specified in this litmus
|
||||
test's "exists" clause was not reached.
|
||||
|
||||
And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
|
||||
for more information.
|
||||
|
||||
|
||||
====================
|
||||
DESCRIPTION OF FILES
|
||||
====================
|
||||
|
||||
Documentation/cheatsheet.txt
|
||||
Quick-reference guide to the Linux-kernel memory model.
|
||||
|
||||
Documentation/explanation.txt
|
||||
Describes the memory model in detail.
|
||||
|
||||
Documentation/recipes.txt
|
||||
Lists common memory-ordering patterns.
|
||||
|
||||
Documentation/references.txt
|
||||
Provides background reading.
|
||||
|
||||
linux-kernel.bell
|
||||
Categorizes the relevant instructions, including memory
|
||||
references, memory barriers, atomic read-modify-write operations,
|
||||
lock acquisition/release, and RCU operations.
|
||||
|
||||
More formally, this file (1) lists the subtypes of the various
|
||||
event types used by the memory model and (2) performs RCU
|
||||
read-side critical section nesting analysis.
|
||||
|
||||
linux-kernel.cat
|
||||
Specifies what reorderings are forbidden by memory references,
|
||||
memory barriers, atomic read-modify-write operations, and RCU.
|
||||
|
||||
More formally, this file specifies what executions are forbidden
|
||||
by the memory model. Allowed executions are those which
|
||||
satisfy the model's "coherence", "atomic", "happens-before",
|
||||
"propagation", and "rcu" axioms, which are defined in the file.
|
||||
|
||||
linux-kernel.cfg
|
||||
Convenience file that gathers the common-case herd7 command-line
|
||||
arguments.
|
||||
|
||||
linux-kernel.def
|
||||
Maps from C-like syntax to herd7's internal litmus-test
|
||||
instruction-set architecture.
|
||||
|
||||
litmus-tests
|
||||
Directory containing a few representative litmus tests, which
|
||||
are listed in litmus-tests/README. A great deal more litmus
|
||||
tests are available at https://github.com/paulmckrcu/litmus.
|
||||
|
||||
lock.cat
|
||||
Provides a front-end analysis of lock acquisition and release,
|
||||
for example, associating a lock acquisition with the preceding
|
||||
and following releases and checking for self-deadlock.
|
||||
|
||||
More formally, this file defines a performance-enhanced scheme
|
||||
for generation of the possible reads-from and coherence order
|
||||
relations on the locking primitives.
|
||||
|
||||
README
|
||||
This file.
|
||||
|
||||
|
||||
===========
|
||||
LIMITATIONS
|
||||
===========
|
||||
|
||||
The Linux-kernel memory model has the following limitations:
|
||||
|
||||
1. Compiler optimizations are not modeled. Of course, the use
|
||||
of READ_ONCE() and WRITE_ONCE() limits the compiler's ability
|
||||
to optimize, but there is Linux-kernel code that uses bare C
|
||||
memory accesses. Handling this code is on the to-do list.
|
||||
For more information, see Documentation/explanation.txt (in
|
||||
particular, the "THE PROGRAM ORDER RELATION: po AND po-loc"
|
||||
and "A WARNING" sections).
|
||||
|
||||
2. Multiple access sizes for a single variable are not supported,
|
||||
and neither are misaligned or partially overlapping accesses.
|
||||
|
||||
3. Exceptions and interrupts are not modeled. In some cases,
|
||||
this limitation can be overcome by modeling the interrupt or
|
||||
exception with an additional process.
|
||||
|
||||
4. I/O such as MMIO or DMA is not supported.
|
||||
|
||||
5. Self-modifying code (such as that found in the kernel's
|
||||
alternatives mechanism, function tracer, Berkeley Packet Filter
|
||||
JIT compiler, and module loader) is not supported.
|
||||
|
||||
6. Complete modeling of all variants of atomic read-modify-write
|
||||
operations, locking primitives, and RCU is not provided.
|
||||
For example, call_rcu() and rcu_barrier() are not supported.
|
||||
However, a substantial amount of support is provided for these
|
||||
operations, as shown in the linux-kernel.def file.
|
||||
|
||||
The "herd7" tool has some additional limitations of its own, apart from
|
||||
the memory model:
|
||||
|
||||
1. Non-trivial data structures such as arrays or structures are
|
||||
not supported. However, pointers are supported, allowing trivial
|
||||
linked lists to be constructed.
|
||||
|
||||
2. Dynamic memory allocation is not supported, although this can
|
||||
be worked around in some cases by supplying multiple statically
|
||||
allocated variables.
|
||||
|
||||
Some of these limitations may be overcome in the future, but others are
|
||||
more likely to be addressed by incorporating the Linux-kernel memory model
|
||||
into other tools.
|
53
tools/memory-model/linux-kernel.bell
Normal file
53
tools/memory-model/linux-kernel.bell
Normal file
@ -0,0 +1,53 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
(*
|
||||
* Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
|
||||
* Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
|
||||
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
|
||||
* Andrea Parri <parri.andrea@gmail.com>
|
||||
*
|
||||
* An earlier version of this file appears in the companion webpage for
|
||||
* "Frightening small children and disconcerting grown-ups: Concurrency
|
||||
* in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
|
||||
* which is to appear in ASPLOS 2018.
|
||||
*)
|
||||
|
||||
"Linux kernel memory model"
|
||||
|
||||
enum Accesses = 'once (*READ_ONCE,WRITE_ONCE,ACCESS_ONCE*) ||
|
||||
'release (*smp_store_release*) ||
|
||||
'acquire (*smp_load_acquire*) ||
|
||||
'noreturn (* R of non-return RMW *)
|
||||
instructions R[{'once,'acquire,'noreturn}]
|
||||
instructions W[{'once,'release}]
|
||||
instructions RMW[{'once,'acquire,'release}]
|
||||
|
||||
enum Barriers = 'wmb (*smp_wmb*) ||
|
||||
'rmb (*smp_rmb*) ||
|
||||
'mb (*smp_mb*) ||
|
||||
'rb_dep (*smp_read_barrier_depends*) ||
|
||||
'rcu-lock (*rcu_read_lock*) ||
|
||||
'rcu-unlock (*rcu_read_unlock*) ||
|
||||
'sync-rcu (*synchronize_rcu*) ||
|
||||
'before_atomic (*smp_mb__before_atomic*) ||
|
||||
'after_atomic (*smp_mb__after_atomic*) ||
|
||||
'after_spinlock (*smp_mb__after_spinlock*)
|
||||
instructions F[Barriers]
|
||||
|
||||
(* Compute matching pairs of nested Rcu-lock and Rcu-unlock *)
|
||||
let matched = let rec
|
||||
unmatched-locks = Rcu-lock \ domain(matched)
|
||||
and unmatched-unlocks = Rcu-unlock \ range(matched)
|
||||
and unmatched = unmatched-locks | unmatched-unlocks
|
||||
and unmatched-po = [unmatched] ; po ; [unmatched]
|
||||
and unmatched-locks-to-unlocks =
|
||||
[unmatched-locks] ; po ; [unmatched-unlocks]
|
||||
and matched = matched | (unmatched-locks-to-unlocks \
|
||||
(unmatched-po ; unmatched-po))
|
||||
in matched
|
||||
|
||||
(* Validate nesting *)
|
||||
flag ~empty Rcu-lock \ domain(matched) as unbalanced-rcu-locking
|
||||
flag ~empty Rcu-unlock \ range(matched) as unbalanced-rcu-locking
|
||||
|
||||
(* Outermost level of nesting only *)
|
||||
let crit = matched \ (po^-1 ; matched ; po^-1)
|
124
tools/memory-model/linux-kernel.cat
Normal file
124
tools/memory-model/linux-kernel.cat
Normal file
@ -0,0 +1,124 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
(*
|
||||
* Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
|
||||
* Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
|
||||
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
|
||||
* Andrea Parri <parri.andrea@gmail.com>
|
||||
*
|
||||
* An earlier version of this file appears in the companion webpage for
|
||||
* "Frightening small children and disconcerting grown-ups: Concurrency
|
||||
* in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
|
||||
* which is to appear in ASPLOS 2018.
|
||||
*)
|
||||
|
||||
"Linux kernel memory model"
|
||||
|
||||
(*
|
||||
* File "lock.cat" handles locks and is experimental.
|
||||
* It can be replaced by include "cos.cat" for tests that do not use locks.
|
||||
*)
|
||||
|
||||
include "lock.cat"
|
||||
|
||||
(*******************)
|
||||
(* Basic relations *)
|
||||
(*******************)
|
||||
|
||||
(* Fences *)
|
||||
let rb-dep = [R] ; fencerel(Rb_dep) ; [R]
|
||||
let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]
|
||||
let wmb = [W] ; fencerel(Wmb) ; [W]
|
||||
let mb = ([M] ; fencerel(Mb) ; [M]) |
|
||||
([M] ; fencerel(Before_atomic) ; [RMW] ; po? ; [M]) |
|
||||
([M] ; po? ; [RMW] ; fencerel(After_atomic) ; [M]) |
|
||||
([M] ; po? ; [LKW] ; fencerel(After_spinlock) ; [M])
|
||||
let gp = po ; [Sync-rcu] ; po?
|
||||
|
||||
let strong-fence = mb | gp
|
||||
|
||||
(* Release Acquire *)
|
||||
let acq-po = [Acquire] ; po ; [M]
|
||||
let po-rel = [M] ; po ; [Release]
|
||||
let rfi-rel-acq = [Release] ; rfi ; [Acquire]
|
||||
|
||||
(**********************************)
|
||||
(* Fundamental coherence ordering *)
|
||||
(**********************************)
|
||||
|
||||
(* Sequential Consistency Per Variable *)
|
||||
let com = rf | co | fr
|
||||
acyclic po-loc | com as coherence
|
||||
|
||||
(* Atomic Read-Modify-Write *)
|
||||
empty rmw & (fre ; coe) as atomic
|
||||
|
||||
(**********************************)
|
||||
(* Instruction execution ordering *)
|
||||
(**********************************)
|
||||
|
||||
(* Preserved Program Order *)
|
||||
let dep = addr | data
|
||||
let rwdep = (dep | ctrl) ; [W]
|
||||
let overwrite = co | fr
|
||||
let to-w = rwdep | (overwrite & int)
|
||||
let rrdep = addr | (dep ; rfi)
|
||||
let strong-rrdep = rrdep+ & rb-dep
|
||||
let to-r = strong-rrdep | rfi-rel-acq
|
||||
let fence = strong-fence | wmb | po-rel | rmb | acq-po
|
||||
let ppo = rrdep* ; (to-r | to-w | fence)
|
||||
|
||||
(* Propagation: Ordering from release operations and strong fences. *)
|
||||
let A-cumul(r) = rfe? ; r
|
||||
let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
|
||||
let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
|
||||
|
||||
(*
|
||||
* Happens Before: Ordering from the passage of time.
|
||||
* No fences needed here for prop because relation confined to one process.
|
||||
*)
|
||||
let hb = ppo | rfe | ((prop \ id) & int)
|
||||
acyclic hb as happens-before
|
||||
|
||||
(****************************************)
|
||||
(* Write and fence propagation ordering *)
|
||||
(****************************************)
|
||||
|
||||
(* Propagation: Each non-rf link needs a strong fence. *)
|
||||
let pb = prop ; strong-fence ; hb*
|
||||
acyclic pb as propagation
|
||||
|
||||
(*******)
|
||||
(* RCU *)
|
||||
(*******)
|
||||
|
||||
(*
|
||||
* Effect of read-side critical section proceeds from the rcu_read_lock()
|
||||
* onward on the one hand and from the rcu_read_unlock() backwards on the
|
||||
* other hand.
|
||||
*)
|
||||
let rscs = po ; crit^-1 ; po?
|
||||
|
||||
(*
|
||||
* The synchronize_rcu() strong fence is special in that it can order not
|
||||
* one but two non-rf relations, but only in conjunction with an RCU
|
||||
* read-side critical section.
|
||||
*)
|
||||
let link = hb* ; pb* ; prop
|
||||
|
||||
(* Chains that affect the RCU grace-period guarantee *)
|
||||
let gp-link = gp ; link
|
||||
let rscs-link = rscs ; link
|
||||
|
||||
(*
|
||||
* A cycle containing at least as many grace periods as RCU read-side
|
||||
* critical sections is forbidden.
|
||||
*)
|
||||
let rec rcu-path =
|
||||
gp-link |
|
||||
(gp-link ; rscs-link) |
|
||||
(rscs-link ; gp-link) |
|
||||
(rcu-path ; rcu-path) |
|
||||
(gp-link ; rcu-path ; rscs-link) |
|
||||
(rscs-link ; rcu-path ; gp-link)
|
||||
|
||||
irreflexive rcu-path as rcu
|
21
tools/memory-model/linux-kernel.cfg
Normal file
21
tools/memory-model/linux-kernel.cfg
Normal file
@ -0,0 +1,21 @@
|
||||
macros linux-kernel.def
|
||||
bell linux-kernel.bell
|
||||
model linux-kernel.cat
|
||||
graph columns
|
||||
squished true
|
||||
showevents noregs
|
||||
movelabel true
|
||||
fontsize 8
|
||||
xscale 2.0
|
||||
yscale 1.5
|
||||
arrowsize 0.8
|
||||
showinitrf false
|
||||
showfinalrf false
|
||||
showinitwrites false
|
||||
splines spline
|
||||
pad 0.1
|
||||
edgeattr hb,color,indigo
|
||||
edgeattr co,color,blue
|
||||
edgeattr mb,color,darkgreen
|
||||
edgeattr wmb,color,darkgreen
|
||||
edgeattr rmb,color,darkgreen
|
108
tools/memory-model/linux-kernel.def
Normal file
108
tools/memory-model/linux-kernel.def
Normal file
@ -0,0 +1,108 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
//
|
||||
// An earlier version of this file appears in the companion webpage for
|
||||
// "Frightening small children and disconcerting grown-ups: Concurrency
|
||||
// in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
|
||||
// which is to appear in ASPLOS 2018.
|
||||
|
||||
// ONCE
|
||||
READ_ONCE(X) __load{once}(X)
|
||||
WRITE_ONCE(X,V) { __store{once}(X,V); }
|
||||
|
||||
// Release Acquire and friends
|
||||
smp_store_release(X,V) { __store{release}(*X,V); }
|
||||
smp_load_acquire(X) __load{acquire}(*X)
|
||||
rcu_assign_pointer(X,V) { __store{release}(X,V); }
|
||||
lockless_dereference(X) __load{lderef}(X)
|
||||
rcu_dereference(X) __load{deref}(X)
|
||||
|
||||
// Fences
|
||||
smp_mb() { __fence{mb} ; }
|
||||
smp_rmb() { __fence{rmb} ; }
|
||||
smp_wmb() { __fence{wmb} ; }
|
||||
smp_read_barrier_depends() { __fence{rb_dep}; }
|
||||
smp_mb__before_atomic() { __fence{before_atomic} ; }
|
||||
smp_mb__after_atomic() { __fence{after_atomic} ; }
|
||||
smp_mb__after_spinlock() { __fence{after_spinlock} ; }
|
||||
|
||||
// Exchange
|
||||
xchg(X,V) __xchg{mb}(X,V)
|
||||
xchg_relaxed(X,V) __xchg{once}(X,V)
|
||||
xchg_release(X,V) __xchg{release}(X,V)
|
||||
xchg_acquire(X,V) __xchg{acquire}(X,V)
|
||||
cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
|
||||
cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
|
||||
cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
|
||||
cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
|
||||
|
||||
// Spinlocks
|
||||
spin_lock(X) { __lock(X) ; }
|
||||
spin_unlock(X) { __unlock(X) ; }
|
||||
spin_trylock(X) __trylock(X)
|
||||
|
||||
// RCU
|
||||
rcu_read_lock() { __fence{rcu-lock}; }
|
||||
rcu_read_unlock() { __fence{rcu-unlock};}
|
||||
synchronize_rcu() { __fence{sync-rcu}; }
|
||||
synchronize_rcu_expedited() { __fence{sync-rcu}; }
|
||||
|
||||
// Atomic
|
||||
atomic_read(X) READ_ONCE(*X)
|
||||
atomic_set(X,V) { WRITE_ONCE(*X,V) ; }
|
||||
atomic_read_acquire(X) smp_load_acquire(X)
|
||||
atomic_set_release(X,V) { smp_store_release(X,V); }
|
||||
|
||||
atomic_add(V,X) { __atomic_op(X,+,V) ; }
|
||||
atomic_sub(V,X) { __atomic_op(X,-,V) ; }
|
||||
atomic_inc(X) { __atomic_op(X,+,1) ; }
|
||||
atomic_dec(X) { __atomic_op(X,-,1) ; }
|
||||
|
||||
atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
|
||||
atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
|
||||
atomic_add_return_acquire(V,X) __atomic_op_return{acquire}(X,+,V)
|
||||
atomic_add_return_release(V,X) __atomic_op_return{release}(X,+,V)
|
||||
atomic_fetch_add(V,X) __atomic_fetch_op{mb}(X,+,V)
|
||||
atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{once}(X,+,V)
|
||||
atomic_fetch_add_acquire(V,X) __atomic_fetch_op{acquire}(X,+,V)
|
||||
atomic_fetch_add_release(V,X) __atomic_fetch_op{release}(X,+,V)
|
||||
|
||||
atomic_inc_return(X) __atomic_op_return{mb}(X,+,1)
|
||||
atomic_inc_return_relaxed(X) __atomic_op_return{once}(X,+,1)
|
||||
atomic_inc_return_acquire(X) __atomic_op_return{acquire}(X,+,1)
|
||||
atomic_inc_return_release(X) __atomic_op_return{release}(X,+,1)
|
||||
atomic_fetch_inc(X) __atomic_fetch_op{mb}(X,+,1)
|
||||
atomic_fetch_inc_relaxed(X) __atomic_fetch_op{once}(X,+,1)
|
||||
atomic_fetch_inc_acquire(X) __atomic_fetch_op{acquire}(X,+,1)
|
||||
atomic_fetch_inc_release(X) __atomic_fetch_op{release}(X,+,1)
|
||||
|
||||
atomic_sub_return(V,X) __atomic_op_return{mb}(X,-,V)
|
||||
atomic_sub_return_relaxed(V,X) __atomic_op_return{once}(X,-,V)
|
||||
atomic_sub_return_acquire(V,X) __atomic_op_return{acquire}(X,-,V)
|
||||
atomic_sub_return_release(V,X) __atomic_op_return{release}(X,-,V)
|
||||
atomic_fetch_sub(V,X) __atomic_fetch_op{mb}(X,-,V)
|
||||
atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{once}(X,-,V)
|
||||
atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{acquire}(X,-,V)
|
||||
atomic_fetch_sub_release(V,X) __atomic_fetch_op{release}(X,-,V)
|
||||
|
||||
atomic_dec_return(X) __atomic_op_return{mb}(X,-,1)
|
||||
atomic_dec_return_relaxed(X) __atomic_op_return{once}(X,-,1)
|
||||
atomic_dec_return_acquire(X) __atomic_op_return{acquire}(X,-,1)
|
||||
atomic_dec_return_release(X) __atomic_op_return{release}(X,-,1)
|
||||
atomic_fetch_dec(X) __atomic_fetch_op{mb}(X,-,1)
|
||||
atomic_fetch_dec_relaxed(X) __atomic_fetch_op{once}(X,-,1)
|
||||
atomic_fetch_dec_acquire(X) __atomic_fetch_op{acquire}(X,-,1)
|
||||
atomic_fetch_dec_release(X) __atomic_fetch_op{release}(X,-,1)
|
||||
|
||||
atomic_xchg(X,V) __xchg{mb}(X,V)
|
||||
atomic_xchg_relaxed(X,V) __xchg{once}(X,V)
|
||||
atomic_xchg_release(X,V) __xchg{release}(X,V)
|
||||
atomic_xchg_acquire(X,V) __xchg{acquire}(X,V)
|
||||
atomic_cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
|
||||
atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
|
||||
atomic_cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
|
||||
atomic_cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
|
||||
|
||||
atomic_sub_and_test(V,X) __atomic_op_return{mb}(X,-,V) == 0
|
||||
atomic_dec_and_test(X) __atomic_op_return{mb}(X,-,1) == 0
|
||||
atomic_inc_and_test(X) __atomic_op_return{mb}(X,+,1) == 0
|
||||
atomic_add_negative(V,X) __atomic_op_return{mb}(X,+,V) < 0
|
19
tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
Normal file
19
tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
Normal file
@ -0,0 +1,19 @@
|
||||
C CoRR+poonceonce+Once
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
P1(int *x)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 1:r1=0)
|
18
tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
Normal file
18
tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
Normal file
@ -0,0 +1,18 @@
|
||||
C CoRW+poonceonce+Once
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
P1(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 2);
|
||||
}
|
||||
|
||||
exists (x=2 /\ 0:r0=2)
|
18
tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
Normal file
18
tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
Normal file
@ -0,0 +1,18 @@
|
||||
C CoWR+poonceonce+Once
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
int r0;
|
||||
|
||||
WRITE_ONCE(*x, 1);
|
||||
r0 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
P1(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 2);
|
||||
}
|
||||
|
||||
exists (x=1 /\ 0:r0=2)
|
11
tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
Normal file
11
tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
Normal file
@ -0,0 +1,11 @@
|
||||
C CoWW+poonceonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
WRITE_ONCE(*x, 2);
|
||||
}
|
||||
|
||||
exists (x=1)
|
@ -0,0 +1,35 @@
|
||||
C IRIW+mbonceonces+OnceOnce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(*y);
|
||||
}
|
||||
|
||||
P2(int *y)
|
||||
{
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P3(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 1:r1=0 /\ 3:r0=1 /\ 3:r1=0)
|
@ -0,0 +1,33 @@
|
||||
C IRIW+poonceonces+OnceOnce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
r1 = READ_ONCE(*y);
|
||||
}
|
||||
|
||||
P2(int *y)
|
||||
{
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P3(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 1:r1=0 /\ 3:r0=1 /\ 3:r1=0)
|
28
tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
Normal file
28
tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
Normal file
@ -0,0 +1,28 @@
|
||||
C ISA2+poonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *y, int *z)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
WRITE_ONCE(*z, 1);
|
||||
}
|
||||
|
||||
P2(int *x, int *z)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*z);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
|
@ -0,0 +1,28 @@
|
||||
C ISA2+pooncerelease+poacquirerelease+poacquireonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
smp_store_release(y, 1);
|
||||
}
|
||||
|
||||
P1(int *y, int *z)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = smp_load_acquire(y);
|
||||
smp_store_release(z, 1);
|
||||
}
|
||||
|
||||
P2(int *x, int *z)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = smp_load_acquire(z);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
|
@ -0,0 +1,23 @@
|
||||
C LB+ctrlonceonce+mbonceonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
if (r0)
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
smp_mb();
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
exists (0:r0=1 /\ 1:r0=1)
|
@ -0,0 +1,21 @@
|
||||
C LB+poacquireonce+pooncerelease
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
smp_store_release(y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = smp_load_acquire(y);
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
exists (0:r0=1 /\ 1:r0=1)
|
21
tools/memory-model/litmus-tests/LB+poonceonces.litmus
Normal file
21
tools/memory-model/litmus-tests/LB+poonceonces.litmus
Normal file
@ -0,0 +1,21 @@
|
||||
C LB+poonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
exists (0:r0=1 /\ 1:r0=1)
|
@ -0,0 +1,25 @@
|
||||
C MP+onceassign+derefonce.litmus
|
||||
|
||||
{
|
||||
y=z;
|
||||
z=0;
|
||||
}
|
||||
|
||||
P0(int *x, int **y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
rcu_assign_pointer(*y, x);
|
||||
}
|
||||
|
||||
P1(int *x, int **y)
|
||||
{
|
||||
int *r0;
|
||||
int r1;
|
||||
|
||||
rcu_read_lock();
|
||||
r0 = rcu_dereference(*y);
|
||||
r1 = READ_ONCE(*r0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
exists (1:r0=x /\ 1:r1=0)
|
24
tools/memory-model/litmus-tests/MP+polocks.litmus
Normal file
24
tools/memory-model/litmus-tests/MP+polocks.litmus
Normal file
@ -0,0 +1,24 @@
|
||||
C MP+polocks
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y, spinlock_t *mylock)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
spin_lock(mylock);
|
||||
WRITE_ONCE(*y, 1);
|
||||
spin_unlock(mylock);
|
||||
}
|
||||
|
||||
P1(int *x, int *y, spinlock_t *mylock)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
spin_lock(mylock);
|
||||
r0 = READ_ONCE(*y);
|
||||
spin_unlock(mylock);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 1:r1=0)
|
20
tools/memory-model/litmus-tests/MP+poonceonces.litmus
Normal file
20
tools/memory-model/litmus-tests/MP+poonceonces.litmus
Normal file
@ -0,0 +1,20 @@
|
||||
C MP+poonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 1:r1=0)
|
@ -0,0 +1,20 @@
|
||||
C MP+pooncerelease+poacquireonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
smp_store_release(y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = smp_load_acquire(y);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 1:r1=0)
|
24
tools/memory-model/litmus-tests/MP+porevlocks.litmus
Normal file
24
tools/memory-model/litmus-tests/MP+porevlocks.litmus
Normal file
@ -0,0 +1,24 @@
|
||||
C MP+porevlocks
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y, spinlock_t *mylock)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
spin_lock(mylock);
|
||||
r1 = READ_ONCE(*x);
|
||||
spin_unlock(mylock);
|
||||
}
|
||||
|
||||
P1(int *x, int *y, spinlock_t *mylock)
|
||||
{
|
||||
spin_lock(mylock);
|
||||
WRITE_ONCE(*x, 1);
|
||||
spin_unlock(mylock);
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
exists (0:r0=1 /\ 0:r1=0)
|
@ -0,0 +1,22 @@
|
||||
C MP+wmbonceonce+rmbonceonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
smp_wmb();
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
smp_rmb();
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 1:r1=0)
|
21
tools/memory-model/litmus-tests/R+mbonceonces.litmus
Normal file
21
tools/memory-model/litmus-tests/R+mbonceonces.litmus
Normal file
@ -0,0 +1,21 @@
|
||||
C R+mbonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
smp_mb();
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
WRITE_ONCE(*y, 2);
|
||||
smp_mb();
|
||||
r0 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (y=2 /\ 1:r0=0)
|
19
tools/memory-model/litmus-tests/R+poonceonces.litmus
Normal file
19
tools/memory-model/litmus-tests/R+poonceonces.litmus
Normal file
@ -0,0 +1,19 @@
|
||||
C R+poonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
WRITE_ONCE(*y, 2);
|
||||
r0 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (y=2 /\ 1:r0=0)
|
125
tools/memory-model/litmus-tests/README
Normal file
125
tools/memory-model/litmus-tests/README
Normal file
@ -0,0 +1,125 @@
|
||||
This directory contains the following litmus tests:
|
||||
|
||||
CoRR+poonceonce+Once.litmus
|
||||
Test of read-read coherence, that is, whether or not two
|
||||
successive reads from the same variable are ordered.
|
||||
|
||||
CoRW+poonceonce+Once.litmus
|
||||
Test of read-write coherence, that is, whether or not a read
|
||||
from a given variable followed by a write to that same variable
|
||||
are ordered.
|
||||
|
||||
CoWR+poonceonce+Once.litmus
|
||||
Test of write-read coherence, that is, whether or not a write
|
||||
to a given variable followed by a read from that same variable
|
||||
are ordered.
|
||||
|
||||
CoWW+poonceonce.litmus
|
||||
Test of write-write coherence, that is, whether or not two
|
||||
successive writes to the same variable are ordered.
|
||||
|
||||
IRIW+mbonceonces+OnceOnce.litmus
|
||||
Test of independent reads from independent writes with smp_mb()
|
||||
between each pairs of reads. In other words, is smp_mb()
|
||||
sufficient to cause two different reading processes to agree on
|
||||
the order of a pair of writes, where each write is to a different
|
||||
variable by a different process.
|
||||
|
||||
IRIW+poonceonces+OnceOnce.litmus
|
||||
Test of independent reads from independent writes with nothing
|
||||
between each pairs of reads. In other words, is anything at all
|
||||
needed to cause two different reading processes to agree on the
|
||||
order of a pair of writes, where each write is to a different
|
||||
variable by a different process.
|
||||
|
||||
ISA2+poonceonces.litmus
|
||||
As below, but with store-release replaced with WRITE_ONCE()
|
||||
and load-acquire replaced with READ_ONCE().
|
||||
|
||||
ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
|
||||
Can a release-acquire chain order a prior store against
|
||||
a later load?
|
||||
|
||||
LB+ctrlonceonce+mbonceonce.litmus
|
||||
Does a control dependency and an smp_mb() suffice for the
|
||||
load-buffering litmus test, where each process reads from one
|
||||
of two variables then writes to the other?
|
||||
|
||||
LB+poacquireonce+pooncerelease.litmus
|
||||
Does a release-acquire pair suffice for the load-buffering
|
||||
litmus test, where each process reads from one of two variables then
|
||||
writes to the other?
|
||||
|
||||
LB+poonceonces.litmus
|
||||
As above, but with store-release replaced with WRITE_ONCE()
|
||||
and load-acquire replaced with READ_ONCE().
|
||||
|
||||
MP+onceassign+derefonce.litmus
|
||||
As below, but with rcu_assign_pointer() and an rcu_dereference().
|
||||
|
||||
MP+polocks.litmus
|
||||
As below, but with the second access of the writer process
|
||||
and the first access of reader process protected by a lock.
|
||||
|
||||
MP+poonceonces.litmus
|
||||
As below, but without the smp_rmb() and smp_wmb().
|
||||
|
||||
MP+pooncerelease+poacquireonce.litmus
|
||||
As below, but with a release-acquire chain.
|
||||
|
||||
MP+porevlocks.litmus
|
||||
As below, but with the first access of the writer process
|
||||
and the second access of reader process protected by a lock.
|
||||
|
||||
MP+wmbonceonce+rmbonceonce.litmus
|
||||
Does a smp_wmb() (between the stores) and an smp_rmb() (between
|
||||
the loads) suffice for the message-passing litmus test, where one
|
||||
process writes data and then a flag, and the other process reads
|
||||
the flag and then the data. (This is similar to the ISA2 tests,
|
||||
but with two processes instead of three.)
|
||||
|
||||
R+mbonceonces.litmus
|
||||
This is the fully ordered (via smp_mb()) version of one of
|
||||
the classic counterintuitive litmus tests that illustrates the
|
||||
effects of store propagation delays.
|
||||
|
||||
R+poonceonces.litmus
|
||||
As above, but without the smp_mb() invocations.
|
||||
|
||||
SB+mbonceonces.litmus
|
||||
This is the fully ordered (again, via smp_mb() version of store
|
||||
buffering, which forms the core of Dekker's mutual-exclusion
|
||||
algorithm.
|
||||
|
||||
SB+poonceonces.litmus
|
||||
As above, but without the smp_mb() invocations.
|
||||
|
||||
S+poonceonces.litmus
|
||||
As below, but without the smp_wmb() and acquire load.
|
||||
|
||||
S+wmbonceonce+poacquireonce.litmus
|
||||
Can a smp_wmb(), instead of a release, and an acquire order
|
||||
a prior store against a subsequent store?
|
||||
|
||||
WRC+poonceonces+Once.litmus
|
||||
WRC+pooncerelease+rmbonceonce+Once.litmus
|
||||
These two are members of an extension of the MP litmus-test class
|
||||
in which the first write is moved to a separate process.
|
||||
|
||||
Z6.0+pooncelock+pooncelock+pombonce.litmus
|
||||
Is the ordering provided by a spin_unlock() and a subsequent
|
||||
spin_lock() sufficient to make ordering apparent to accesses
|
||||
by a process not holding the lock?
|
||||
|
||||
Z6.0+pooncelock+poonceLock+pombonce.litmus
|
||||
As above, but with smp_mb__after_spinlock() immediately
|
||||
following the spin_lock().
|
||||
|
||||
Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus
|
||||
Is the ordering provided by a release-acquire chain sufficient
|
||||
to make ordering apparent to accesses by a process that does
|
||||
not participate in that release-acquire chain?
|
||||
|
||||
A great many more litmus tests are available here:
|
||||
|
||||
https://github.com/paulmckrcu/litmus
|
19
tools/memory-model/litmus-tests/S+poonceonces.litmus
Normal file
19
tools/memory-model/litmus-tests/S+poonceonces.litmus
Normal file
@ -0,0 +1,19 @@
|
||||
C S+poonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 2);
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
exists (x=2 /\ 1:r0=1)
|
@ -0,0 +1,20 @@
|
||||
C S+wmbonceonce+poacquireonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 2);
|
||||
smp_wmb();
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = smp_load_acquire(y);
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
exists (x=2 /\ 1:r0=1)
|
23
tools/memory-model/litmus-tests/SB+mbonceonces.litmus
Normal file
23
tools/memory-model/litmus-tests/SB+mbonceonces.litmus
Normal file
@ -0,0 +1,23 @@
|
||||
C SB+mbonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
WRITE_ONCE(*x, 1);
|
||||
smp_mb();
|
||||
r0 = READ_ONCE(*y);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
WRITE_ONCE(*y, 1);
|
||||
smp_mb();
|
||||
r0 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (0:r0=0 /\ 1:r0=0)
|
21
tools/memory-model/litmus-tests/SB+poonceonces.litmus
Normal file
21
tools/memory-model/litmus-tests/SB+poonceonces.litmus
Normal file
@ -0,0 +1,21 @@
|
||||
C SB+poonceonces
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
WRITE_ONCE(*x, 1);
|
||||
r0 = READ_ONCE(*y);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
WRITE_ONCE(*y, 1);
|
||||
r0 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (0:r0=0 /\ 1:r0=0)
|
27
tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
Normal file
27
tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
Normal file
@ -0,0 +1,27 @@
|
||||
C WRC+poonceonces+Once
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
WRITE_ONCE(*y, 1);
|
||||
}
|
||||
|
||||
P2(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
|
@ -0,0 +1,28 @@
|
||||
C WRC+pooncerelease+rmbonceonce+Once
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
}
|
||||
|
||||
P1(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = READ_ONCE(*x);
|
||||
smp_store_release(y, 1);
|
||||
}
|
||||
|
||||
P2(int *x, int *y)
|
||||
{
|
||||
int r0;
|
||||
int r1;
|
||||
|
||||
r0 = READ_ONCE(*y);
|
||||
smp_rmb();
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
|
@ -0,0 +1,33 @@
|
||||
C Z6.0+pooncelock+poonceLock+pombonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y, spinlock_t *mylock)
|
||||
{
|
||||
spin_lock(mylock);
|
||||
WRITE_ONCE(*x, 1);
|
||||
WRITE_ONCE(*y, 1);
|
||||
spin_unlock(mylock);
|
||||
}
|
||||
|
||||
P1(int *y, int *z, spinlock_t *mylock)
|
||||
{
|
||||
int r0;
|
||||
|
||||
spin_lock(mylock);
|
||||
smp_mb__after_spinlock();
|
||||
r0 = READ_ONCE(*y);
|
||||
WRITE_ONCE(*z, 1);
|
||||
spin_unlock(mylock);
|
||||
}
|
||||
|
||||
P2(int *x, int *z)
|
||||
{
|
||||
int r1;
|
||||
|
||||
WRITE_ONCE(*z, 2);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ z=2 /\ 2:r1=0)
|
@ -0,0 +1,32 @@
|
||||
C Z6.0+pooncelock+pooncelock+pombonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y, spinlock_t *mylock)
|
||||
{
|
||||
spin_lock(mylock);
|
||||
WRITE_ONCE(*x, 1);
|
||||
WRITE_ONCE(*y, 1);
|
||||
spin_unlock(mylock);
|
||||
}
|
||||
|
||||
P1(int *y, int *z, spinlock_t *mylock)
|
||||
{
|
||||
int r0;
|
||||
|
||||
spin_lock(mylock);
|
||||
r0 = READ_ONCE(*y);
|
||||
WRITE_ONCE(*z, 1);
|
||||
spin_unlock(mylock);
|
||||
}
|
||||
|
||||
P2(int *x, int *z)
|
||||
{
|
||||
int r1;
|
||||
|
||||
WRITE_ONCE(*z, 2);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ z=2 /\ 2:r1=0)
|
@ -0,0 +1,28 @@
|
||||
C Z6.0+pooncerelease+poacquirerelease+mbonceonce
|
||||
|
||||
{}
|
||||
|
||||
P0(int *x, int *y)
|
||||
{
|
||||
WRITE_ONCE(*x, 1);
|
||||
smp_store_release(y, 1);
|
||||
}
|
||||
|
||||
P1(int *y, int *z)
|
||||
{
|
||||
int r0;
|
||||
|
||||
r0 = smp_load_acquire(y);
|
||||
smp_store_release(z, 1);
|
||||
}
|
||||
|
||||
P2(int *x, int *z)
|
||||
{
|
||||
int r1;
|
||||
|
||||
WRITE_ONCE(*z, 2);
|
||||
smp_mb();
|
||||
r1 = READ_ONCE(*x);
|
||||
}
|
||||
|
||||
exists (1:r0=1 /\ z=2 /\ 2:r1=0)
|
99
tools/memory-model/lock.cat
Normal file
99
tools/memory-model/lock.cat
Normal file
@ -0,0 +1,99 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
(*
|
||||
* Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
|
||||
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>
|
||||
*)
|
||||
|
||||
(* Generate coherence orders and handle lock operations *)
|
||||
|
||||
include "cross.cat"
|
||||
|
||||
(* From lock reads to their partner lock writes *)
|
||||
let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
|
||||
let rmw = rmw | lk-rmw
|
||||
|
||||
(*
|
||||
* A paired LKR must always see an unlocked value; spin_lock() calls nested
|
||||
* inside a critical section (for the same lock) always deadlock.
|
||||
*)
|
||||
empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc)
|
||||
as lock-nest
|
||||
|
||||
(* The litmus test is invalid if an LKW event is not part of an RMW pair *)
|
||||
flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
|
||||
|
||||
(* This will be allowed if we implement spin_is_locked() *)
|
||||
flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
|
||||
|
||||
(* There should be no R or W accesses to spinlocks *)
|
||||
let ALL-LOCKS = LKR | LKW | UL | LF
|
||||
flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
|
||||
|
||||
(* The final value of a spinlock should not be tested *)
|
||||
flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final
|
||||
|
||||
|
||||
(*
|
||||
* Put lock operations in their appropriate classes, but leave UL out of W
|
||||
* until after the co relation has been generated.
|
||||
*)
|
||||
let R = R | LKR | LF
|
||||
let W = W | LKW
|
||||
|
||||
let Release = Release | UL
|
||||
let Acquire = Acquire | LKR
|
||||
|
||||
|
||||
(* Match LKW events to their corresponding UL events *)
|
||||
let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc)
|
||||
|
||||
flag ~empty UL \ range(critical) as unmatched-unlock
|
||||
|
||||
(* Allow up to one unmatched LKW per location; more must deadlock *)
|
||||
let UNMATCHED-LKW = LKW \ domain(critical)
|
||||
empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks
|
||||
|
||||
|
||||
(* rfi for LF events: link each LKW to the LF events in its critical section *)
|
||||
let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc)
|
||||
|
||||
(* rfe for LF events *)
|
||||
let all-possible-rfe-lf =
|
||||
(*
|
||||
* Given an LF event r, compute the possible rfe edges for that event
|
||||
* (all those starting from LKW events in other threads),
|
||||
* and then convert that relation to a set of single-edge relations.
|
||||
*)
|
||||
let possible-rfe-lf r =
|
||||
let pair-to-relation p = p ++ 0
|
||||
in map pair-to-relation ((LKW * {r}) & loc & ext)
|
||||
(* Do this for each LF event r that isn't in rfi-lf *)
|
||||
in map possible-rfe-lf (LF \ range(rfi-lf))
|
||||
|
||||
(* Generate all rf relations for LF events *)
|
||||
with rfe-lf from cross(all-possible-rfe-lf)
|
||||
let rf = rf | rfi-lf | rfe-lf
|
||||
|
||||
|
||||
(* Generate all co relations, including LKW events but not UL *)
|
||||
let co0 = co0 | ([IW] ; loc ; [LKW]) |
|
||||
(([LKW] ; loc ; [UNMATCHED-LKW]) \ [UNMATCHED-LKW])
|
||||
include "cos-opt.cat"
|
||||
let W = W | UL
|
||||
let M = R | W
|
||||
|
||||
(* Merge UL events into co *)
|
||||
let co = (co | critical | (critical^-1 ; co))+
|
||||
let coe = co & ext
|
||||
let coi = co & int
|
||||
|
||||
(* Merge LKR events into rf *)
|
||||
let rf = rf | ([IW | UL] ; singlestep(co) ; lk-rmw^-1)
|
||||
let rfe = rf & ext
|
||||
let rfi = rf & int
|
||||
|
||||
let fr = rf^-1 ; co
|
||||
let fre = fr & ext
|
||||
let fri = fr & int
|
||||
|
||||
show co,rf,fr
|
Loading…
Reference in New Issue
Block a user