random: reseed in delayed work rather than on-demand

Currently, we reseed when random bytes are requested, if the current
seed is too old. Since random bytes can be requested from all contexts,
including hard IRQ, this means sometimes we wind up adding a bit of
latency to hard IRQ. This was so much of a problem on s390x that now
s390x just doesn't provide its architectural RNG from hard IRQ context,
so we miss out in that case.

Instead, let's just schedule a persistent delayed work, so that the
reseeding and potentially expensive operations will always happen from
process context, reducing unexpected latencies from hard IRQ.

This also has the nice effect of accumulating a transcript of random
inputs over time, since it means that we amass more input values. And it
should make future vDSO integration a bit easier.

Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: Juergen Christ <jchrist@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
This commit is contained in:
Jason A. Donenfeld 2022-11-17 17:47:12 +01:00
parent d7bf7f3b81
commit 9148de3196

View File

@ -182,7 +182,6 @@ enum {
static struct {
u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
unsigned long birth;
unsigned long generation;
spinlock_t lock;
} base_crng = {
@ -200,16 +199,41 @@ static DEFINE_PER_CPU(struct crng, crngs) = {
.lock = INIT_LOCAL_LOCK(crngs.lock),
};
/*
* Return the interval until the next reseeding, which is normally
* CRNG_RESEED_INTERVAL, but during early boot, it is at an interval
* proportional to the uptime.
*/
static unsigned int crng_reseed_interval(void)
{
static bool early_boot = true;
if (unlikely(READ_ONCE(early_boot))) {
time64_t uptime = ktime_get_seconds();
if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
WRITE_ONCE(early_boot, false);
else
return max_t(unsigned int, CRNG_RESEED_START_INTERVAL,
(unsigned int)uptime / 2 * HZ);
}
return CRNG_RESEED_INTERVAL;
}
/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */
static void extract_entropy(void *buf, size_t len);
/* This extracts a new crng key from the input pool. */
static void crng_reseed(void)
static void crng_reseed(struct work_struct *work)
{
static DECLARE_DELAYED_WORK(next_reseed, crng_reseed);
unsigned long flags;
unsigned long next_gen;
u8 key[CHACHA_KEY_SIZE];
/* Immediately schedule the next reseeding, so that it fires sooner rather than later. */
if (likely(system_unbound_wq))
queue_delayed_work(system_unbound_wq, &next_reseed, crng_reseed_interval());
extract_entropy(key, sizeof(key));
/*
@ -224,7 +248,6 @@ static void crng_reseed(void)
if (next_gen == ULONG_MAX)
++next_gen;
WRITE_ONCE(base_crng.generation, next_gen);
WRITE_ONCE(base_crng.birth, jiffies);
if (!static_branch_likely(&crng_is_ready))
crng_init = CRNG_READY;
spin_unlock_irqrestore(&base_crng.lock, flags);
@ -263,26 +286,6 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
memzero_explicit(first_block, sizeof(first_block));
}
/*
* Return the interval until the next reseeding, which is normally
* CRNG_RESEED_INTERVAL, but during early boot, it is at an interval
* proportional to the uptime.
*/
static unsigned int crng_reseed_interval(void)
{
static bool early_boot = true;
if (unlikely(READ_ONCE(early_boot))) {
time64_t uptime = ktime_get_seconds();
if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
WRITE_ONCE(early_boot, false);
else
return max_t(unsigned int, CRNG_RESEED_START_INTERVAL,
(unsigned int)uptime / 2 * HZ);
}
return CRNG_RESEED_INTERVAL;
}
/*
* This function returns a ChaCha state that you may use for generating
* random data. It also returns up to 32 bytes on its own of random data
@ -318,13 +321,6 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
return;
}
/*
* If the base_crng is old enough, we reseed, which in turn bumps the
* generation counter that we check below.
*/
if (unlikely(time_is_before_jiffies(READ_ONCE(base_crng.birth) + crng_reseed_interval())))
crng_reseed();
local_lock_irqsave(&crngs.lock, flags);
crng = raw_cpu_ptr(&crngs);
@ -698,7 +694,7 @@ static void __cold _credit_init_bits(size_t bits)
} while (!try_cmpxchg(&input_pool.init_bits, &orig, new));
if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */
crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
if (static_key_initialized)
execute_in_process_context(crng_set_ready, &set_ready);
wake_up_interruptible(&crng_init_wait);
@ -806,7 +802,7 @@ static int random_pm_notification(struct notifier_block *nb, unsigned long actio
if (crng_ready() && (action == PM_RESTORE_PREPARE ||
(action == PM_POST_SUSPEND && !IS_ENABLED(CONFIG_PM_AUTOSLEEP) &&
!IS_ENABLED(CONFIG_PM_USERSPACE_AUTOSLEEP)))) {
crng_reseed();
crng_reseed(NULL);
pr_notice("crng reseeded on system resumption\n");
}
return 0;
@ -850,7 +846,7 @@ void __init random_init_early(const char *command_line)
/* Reseed if already seeded by earlier phases. */
if (crng_ready())
crng_reseed();
crng_reseed(NULL);
else if (trust_cpu)
_credit_init_bits(arch_bits);
}
@ -878,7 +874,7 @@ void __init random_init(void)
/* Reseed if already seeded by earlier phases. */
if (crng_ready())
crng_reseed();
crng_reseed(NULL);
WARN_ON(register_pm_notifier(&pm_notifier));
@ -948,7 +944,7 @@ void __cold add_vmfork_randomness(const void *unique_vm_id, size_t len)
{
add_device_randomness(unique_vm_id, len);
if (crng_ready()) {
crng_reseed();
crng_reseed(NULL);
pr_notice("crng reseeded due to virtual machine fork\n");
}
blocking_notifier_call_chain(&vmfork_chain, 0, NULL);
@ -1470,7 +1466,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EPERM;
if (!crng_ready())
return -ENODATA;
crng_reseed();
crng_reseed(NULL);
return 0;
default:
return -EINVAL;