linux/fs/squashfs/decompressor_multi_percpu.c
Julia Cartwright fd56200a16 squashfs: Make use of local lock in multi_cpu decompressor
The squashfs multi CPU decompressor makes use of get_cpu_ptr() to
acquire a pointer to per-CPU data. get_cpu_ptr() implicitly disables
preemption which serializes the access to the per-CPU data.

But decompression can take quite some time depending on the size. The
observed preempt disabled times in real world scenarios went up to 8ms,
causing massive wakeup latencies. This happens on all CPUs as the
decompression is fully parallelized.

Replace the implicit preemption control with an explicit local lock.
This allows RT kernels to substitute it with a real per CPU lock, which
serializes the access but keeps the code section preemptible. On non RT
kernels this maps to preempt_disable() as before, i.e. no functional
change.

[ bigeasy: Use local_lock(), patch description]

Reported-by: Alexander Stein <alexander.stein@systec-electronic.com>
Signed-off-by: Julia Cartwright <julia@ni.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Alexander Stein <alexander.stein@systec-electronic.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20200527201119.1692513-5-bigeasy@linutronix.de
2020-05-28 10:31:10 +02:00

103 lines
2.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013
* Phillip Lougher <phillip@squashfs.org.uk>
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/buffer_head.h>
#include <linux/local_lock.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "decompressor.h"
#include "squashfs.h"
/*
* This file implements multi-threaded decompression using percpu
* variables, one thread per cpu core.
*/
struct squashfs_stream {
void *stream;
local_lock_t lock;
};
void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
struct squashfs_stream __percpu *percpu;
int err, cpu;
percpu = alloc_percpu(struct squashfs_stream);
if (percpu == NULL)
return ERR_PTR(-ENOMEM);
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
stream->stream = msblk->decompressor->init(msblk, comp_opts);
if (IS_ERR(stream->stream)) {
err = PTR_ERR(stream->stream);
goto out;
}
local_lock_init(&stream->lock);
}
kfree(comp_opts);
return (__force void *) percpu;
out:
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
if (!IS_ERR_OR_NULL(stream->stream))
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
return ERR_PTR(err);
}
void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
struct squashfs_stream *stream;
int cpu;
if (msblk->stream) {
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
}
}
int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
int b, int offset, int length, struct squashfs_page_actor *output)
{
struct squashfs_stream *stream;
int res;
local_lock(&msblk->stream->lock);
stream = this_cpu_ptr(msblk->stream);
res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
offset, length, output);
local_unlock(&msblk->stream->lock);
if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
msblk->decompressor->name);
return res;
}
int squashfs_max_decompressors(void)
{
return num_possible_cpus();
}