From e88d62cd4b2f0b1ae55e9008e79c2794b1fc914d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 Sep 2017 12:41:52 +0100 Subject: [PATCH 1/3] percpu: make this_cpu_generic_read() atomic w.r.t. interrupts As raw_cpu_generic_read() is a plain read from a raw_cpu_ptr() address, it's possible (albeit unlikely) that the compiler will split the access across multiple instructions. In this_cpu_generic_read() we disable preemption but not interrupts before calling raw_cpu_generic_read(). Thus, an interrupt could be taken in the middle of the split load instructions. If a this_cpu_write() or RMW this_cpu_*() op is made to the same variable in the interrupt handling path, this_cpu_read() will return a torn value. For native word types, we can avoid tearing using READ_ONCE(), but this won't work in all cases (e.g. 64-bit types on most 32-bit platforms). This patch reworks this_cpu_generic_read() to use READ_ONCE() where possible, otherwise falling back to disabling interrupts. Signed-off-by: Mark Rutland Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Peter Zijlstra Cc: Pranith Kumar Cc: Tejun Heo Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 0504ef8f3aa3..976f8ac26665 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -115,15 +115,35 @@ do { \ (__ret); \ }) -#define this_cpu_generic_read(pcp) \ +#define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ typeof(pcp) __ret; \ preempt_disable_notrace(); \ - __ret = raw_cpu_generic_read(pcp); \ + __ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ preempt_enable_notrace(); \ __ret; \ }) +#define __this_cpu_generic_read_noirq(pcp) \ +({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_generic_read(pcp); \ + raw_local_irq_restore(__flags); \ + __ret; \ +}) + +#define this_cpu_generic_read(pcp) \ +({ \ + typeof(pcp) __ret; \ + if (__native_word(pcp)) \ + __ret = __this_cpu_generic_read_nopreempt(pcp); \ + else \ + __ret = __this_cpu_generic_read_noirq(pcp); \ + __ret; \ +}) + #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long __flags; \ From 2e08d20d777e997bf37806b22b471f98fbe6b693 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 27 Sep 2017 16:34:59 -0500 Subject: [PATCH 2/3] percpu: fix starting offset for chunk statistics traversal This patch fixes the starting offset used when scanning chunks to compute the chunk statistics. The value start_offset (and end_offset) are managed in bytes while the traversal occurs over bits. Thus for the reserved and dynamic chunk, it may incorrectly skip over the initial allocations. Signed-off-by: Dennis Zhou Signed-off-by: Tejun Heo --- mm/percpu-stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 6142484e88f7..7a58460bfd27 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -73,7 +73,7 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, last_alloc + 1 : 0; as_len = 0; - start = chunk->start_offset; + start = chunk->start_offset / PCPU_MIN_ALLOC_SIZE; /* * If a bit is set in the allocation map, the bound_map identifies From 1fa4df3e688902d033dfda796eb83ae6ad8d0488 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 27 Sep 2017 16:35:00 -0500 Subject: [PATCH 3/3] percpu: fix iteration to prevent skipping over block The iterator functions pcpu_next_md_free_region and pcpu_next_fit_region use the block offset to determine if they have checked the area in the prior iteration. However, this causes an issue when the block offset is greater than subsequent block contig hints. If within the iterator it moves to check subsequent blocks, it may fail in the second predicate due to the block offset not being cleared. Thus, this causes the allocator to skip over blocks leading to false failures when allocating from the reserved chunk. While this happens in the general case as well, it will only fail if it cannot allocate a new chunk. This patch resets the block offset to 0 to pass the second predicate when checking subseqent blocks within the iterator function. Signed-off-by: Dennis Zhou Reported-and-tested-by: Luis Henriques Signed-off-by: Tejun Heo --- mm/percpu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/percpu.c b/mm/percpu.c index 59d44d61f5f1..aa121cef76de 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -353,6 +353,8 @@ static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off, block->contig_hint_start); return; } + /* reset to satisfy the second predicate above */ + block_off = 0; *bits = block->right_free; *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free; @@ -407,6 +409,8 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, *bit_off = pcpu_block_off_to_off(i, block->first_free); return; } + /* reset to satisfy the second predicate above */ + block_off = 0; *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free, align);