diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d359bcfadd39..5dd660aac0ae 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1029,23 +1029,17 @@ This is a directory, with the following entries: * ``poolsize``: the entropy pool size, in bits; * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum - number of seconds between urandom pool reseeding). + number of seconds between urandom pool reseeding). This file is + writable for compatibility purposes, but writing to it has no effect + on any RNG behavior. * ``uuid``: a UUID generated every time this is retrieved (this can thus be used to generate UUIDs at will); * ``write_wakeup_threshold``: when the entropy count drops below this (as a number of bits), processes waiting to write to ``/dev/random`` - are woken up. - -If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH`` -defined, these additional entries are present: - -* ``add_interrupt_avg_cycles``: the average number of cycles between - interrupts used to feed the pool; - -* ``add_interrupt_avg_deviation``: the standard deviation seen on the - number of cycles between interrupts used to feed the pool. + are woken up. This file is writable for compatibility purposes, but + writing to it has no effect on any RNG behavior. randomize_va_space diff --git a/MAINTAINERS b/MAINTAINERS index ac4508914b3a..7940c41f65d5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16213,6 +16213,7 @@ M: Jason A. Donenfeld T: git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git S: Maintained F: drivers/char/random.c +F: drivers/virt/vmgenid.c RAPIDIO SUBSYSTEM M: Matt Porter diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index a3db27916256..cfb085de876b 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/mem.c b/drivers/char/mem.c index cc296f0823bd..9f586025dbe6 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -707,7 +707,7 @@ static const struct memdev { [5] = { "zero", 0666, &zero_fops, FMODE_NOWAIT }, [7] = { "full", 0666, &full_fops, 0 }, [8] = { "random", 0666, &random_fops, 0 }, - [9] = { "urandom", 0666, &urandom_fops, 0 }, + [9] = { "urandom", 0666, &random_fops, 0 }, #ifdef CONFIG_PRINTK [11] = { "kmsg", 0644, &kmsg_fops, 0 }, #endif diff --git a/drivers/char/random.c b/drivers/char/random.c index 3404a91edf29..0bdefada7453 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1,320 +1,28 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * random.c -- A strong random number generator - * * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. - * * Copyright Matt Mackall , 2003, 2004, 2005 + * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved. * - * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All - * rights reserved. + * This driver produces cryptographically secure pseudorandom data. It is divided + * into roughly six sections, each with a section header: * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, and the entire permission notice in its entirety, - * including the disclaimer of warranties. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. + * - Initialization and readiness waiting. + * - Fast key erasure RNG, the "crng". + * - Entropy accumulation and extraction routines. + * - Entropy collection routines. + * - Userspace reader/writer interfaces. + * - Sysctl interface. * - * ALTERNATIVELY, this product may be distributed under the terms of - * the GNU General Public License, in which case the provisions of the GPL are - * required INSTEAD OF the above restrictions. (This clause is - * necessary due to a potential bad interaction between the GPL and - * the restrictions contained in a BSD-style copyright.) - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF - * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT - * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - */ - -/* - * (now, with legal B.S. out of the way.....) - * - * This routine gathers environmental noise from device drivers, etc., - * and returns good random numbers, suitable for cryptographic use. - * Besides the obvious cryptographic uses, these numbers are also good - * for seeding TCP sequence numbers, and other places where it is - * desirable to have numbers which are not only random, but hard to - * predict by an attacker. - * - * Theory of operation - * =================== - * - * Computers are very predictable devices. Hence it is extremely hard - * to produce truly random numbers on a computer --- as opposed to - * pseudo-random numbers, which can easily generated by using a - * algorithm. Unfortunately, it is very easy for attackers to guess - * the sequence of pseudo-random number generators, and for some - * applications this is not acceptable. So instead, we must try to - * gather "environmental noise" from the computer's environment, which - * must be hard for outside attackers to observe, and use that to - * generate random numbers. In a Unix environment, this is best done - * from inside the kernel. - * - * Sources of randomness from the environment include inter-keyboard - * timings, inter-interrupt timings from some interrupts, and other - * events which are both (a) non-deterministic and (b) hard for an - * outside observer to measure. Randomness from these sources are - * added to an "entropy pool", which is mixed using a CRC-like function. - * This is not cryptographically strong, but it is adequate assuming - * the randomness is not chosen maliciously, and it is fast enough that - * the overhead of doing it on every interrupt is very reasonable. - * As random bytes are mixed into the entropy pool, the routines keep - * an *estimate* of how many bits of randomness have been stored into - * the random number generator's internal state. - * - * When random bytes are desired, they are obtained by taking the BLAKE2s - * hash of the contents of the "entropy pool". The BLAKE2s hash avoids - * exposing the internal state of the entropy pool. It is believed to - * be computationally infeasible to derive any useful information - * about the input of BLAKE2s from its output. Even if it is possible to - * analyze BLAKE2s in some clever way, as long as the amount of data - * returned from the generator is less than the inherent entropy in - * the pool, the output data is totally unpredictable. For this - * reason, the routine decreases its internal estimate of how many - * bits of "true randomness" are contained in the entropy pool as it - * outputs random numbers. - * - * If this estimate goes to zero, the routine can still generate - * random numbers; however, an attacker may (at least in theory) be - * able to infer the future output of the generator from prior - * outputs. This requires successful cryptanalysis of BLAKE2s, which is - * not believed to be feasible, but there is a remote possibility. - * Nonetheless, these numbers should be useful for the vast majority - * of purposes. - * - * Exported interfaces ---- output - * =============================== - * - * There are four exported interfaces; two for use within the kernel, - * and two for use from userspace. - * - * Exported interfaces ---- userspace output - * ----------------------------------------- - * - * The userspace interfaces are two character devices /dev/random and - * /dev/urandom. /dev/random is suitable for use when very high - * quality randomness is desired (for example, for key generation or - * one-time pads), as it will only return a maximum of the number of - * bits of randomness (as estimated by the random number generator) - * contained in the entropy pool. - * - * The /dev/urandom device does not have this limit, and will return - * as many bytes as are requested. As more and more random bytes are - * requested without giving time for the entropy pool to recharge, - * this will result in random numbers that are merely cryptographically - * strong. For many applications, however, this is acceptable. - * - * Exported interfaces ---- kernel output - * -------------------------------------- - * - * The primary kernel interface is - * - * void get_random_bytes(void *buf, int nbytes); - * - * This interface will return the requested number of random bytes, - * and place it in the requested buffer. This is equivalent to a - * read from /dev/urandom. - * - * For less critical applications, there are the functions: - * - * u32 get_random_u32() - * u64 get_random_u64() - * unsigned int get_random_int() - * unsigned long get_random_long() - * - * These are produced by a cryptographic RNG seeded from get_random_bytes, - * and so do not deplete the entropy pool as much. These are recommended - * for most in-kernel operations *if the result is going to be stored in - * the kernel*. - * - * Specifically, the get_random_int() family do not attempt to do - * "anti-backtracking". If you capture the state of the kernel (e.g. - * by snapshotting the VM), you can figure out previous get_random_int() - * return values. But if the value is stored in the kernel anyway, - * this is not a problem. - * - * It *is* safe to expose get_random_int() output to attackers (e.g. as - * network cookies); given outputs 1..n, it's not feasible to predict - * outputs 0 or n+1. The only concern is an attacker who breaks into - * the kernel later; the get_random_int() engine is not reseeded as - * often as the get_random_bytes() one. - * - * get_random_bytes() is needed for keys that need to stay secret after - * they are erased from the kernel. For example, any key that will - * be wrapped and stored encrypted. And session encryption keys: we'd - * like to know that after the session is closed and the keys erased, - * the plaintext is unrecoverable to someone who recorded the ciphertext. - * - * But for network ports/cookies, stack canaries, PRNG seeds, address - * space layout randomization, session *authentication* keys, or other - * applications where the sensitive data is stored in the kernel in - * plaintext for as long as it's sensitive, the get_random_int() family - * is just fine. - * - * Consider ASLR. We want to keep the address space secret from an - * outside attacker while the process is running, but once the address - * space is torn down, it's of no use to an attacker any more. And it's - * stored in kernel data structures as long as it's alive, so worrying - * about an attacker's ability to extrapolate it from the get_random_int() - * CRNG is silly. - * - * Even some cryptographic keys are safe to generate with get_random_int(). - * In particular, keys for SipHash are generally fine. Here, knowledge - * of the key authorizes you to do something to a kernel object (inject - * packets to a network connection, or flood a hash table), and the - * key is stored with the object being protected. Once it goes away, - * we no longer care if anyone knows the key. - * - * prandom_u32() - * ------------- - * - * For even weaker applications, see the pseudorandom generator - * prandom_u32(), prandom_max(), and prandom_bytes(). If the random - * numbers aren't security-critical at all, these are *far* cheaper. - * Useful for self-tests, random error simulation, randomized backoffs, - * and any other application where you trust that nobody is trying to - * maliciously mess with you by guessing the "random" numbers. - * - * Exported interfaces ---- input - * ============================== - * - * The current exported interfaces for gathering environmental noise - * from the devices are: - * - * void add_device_randomness(const void *buf, unsigned int size); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); - * void add_interrupt_randomness(int irq); - * void add_disk_randomness(struct gendisk *disk); - * void add_hwgenerator_randomness(const char *buffer, size_t count, - * size_t entropy); - * void add_bootloader_randomness(const void *buf, unsigned int size); - * - * add_device_randomness() is for adding data to the random pool that - * is likely to differ between two devices (or possibly even per boot). - * This would be things like MAC addresses or serial numbers, or the - * read-out of the RTC. This does *not* add any actual entropy to the - * pool, but it initializes the pool to different values for devices - * that might otherwise be identical and have very little entropy - * available to them (particularly common in the embedded world). - * - * add_input_randomness() uses the input layer interrupt timing, as well as - * the event type information from the hardware. - * - * add_interrupt_randomness() uses the interrupt timing as random - * inputs to the entropy pool. Using the cycle counters and the irq source - * as inputs, it feeds the randomness roughly once a second. - * - * add_disk_randomness() uses what amounts to the seek time of block - * layer request events, on a per-disk_devt basis, as input to the - * entropy pool. Note that high-speed solid state drives with very low - * seek times do not make for good sources of entropy, as their seek - * times are usually fairly consistent. - * - * All of these routines try to estimate how many bits of randomness a - * particular randomness source. They do this by keeping track of the - * first and second order deltas of the event timings. - * - * add_hwgenerator_randomness() is for true hardware RNGs, and will credit - * entropy as specified by the caller. If the entropy pool is full it will - * block until more entropy is needed. - * - * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or - * add_device_randomness(), depending on whether or not the configuration - * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. - * - * Ensuring unpredictability at system startup - * ============================================ - * - * When any operating system starts up, it will go through a sequence - * of actions that are fairly predictable by an adversary, especially - * if the start-up does not involve interaction with a human operator. - * This reduces the actual number of bits of unpredictability in the - * entropy pool below the value in entropy_count. In order to - * counteract this effect, it helps to carry information in the - * entropy pool across shut-downs and start-ups. To do this, put the - * following lines an appropriate script which is run during the boot - * sequence: - * - * echo "Initializing random number generator..." - * random_seed=/var/run/random-seed - * # Carry a random seed from start-up to start-up - * # Load and then save the whole entropy pool - * if [ -f $random_seed ]; then - * cat $random_seed >/dev/urandom - * else - * touch $random_seed - * fi - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * and the following lines in an appropriate script which is run as - * the system is shutdown: - * - * # Carry a random seed from shut-down to start-up - * # Save the whole entropy pool - * echo "Saving random seed..." - * random_seed=/var/run/random-seed - * touch $random_seed - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * For example, on most modern systems using the System V init - * scripts, such code fragments would be found in - * /etc/rc.d/init.d/random. On older Linux systems, the correct script - * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0. - * - * Effectively, these commands cause the contents of the entropy pool - * to be saved at shut-down time and reloaded into the entropy pool at - * start-up. (The 'dd' in the addition to the bootup script is to - * make sure that /etc/random-seed is different for every start-up, - * even if the system crashes without executing rc.0.) Even with - * complete knowledge of the start-up activities, predicting the state - * of the entropy pool requires knowledge of the previous history of - * the system. - * - * Configuring the /dev/random driver under Linux - * ============================================== - * - * The /dev/random driver under Linux uses minor numbers 8 and 9 of - * the /dev/mem major number (#1). So if your system does not have - * /dev/random and /dev/urandom created already, they can be created - * by using the commands: - * - * mknod /dev/random c 1 8 - * mknod /dev/urandom c 1 9 - * - * Acknowledgements: - * ================= - * - * Ideas for constructing this random number generator were derived - * from Pretty Good Privacy's random number generator, and from private - * discussions with Phil Karn. Colin Plumb provided a faster random - * number generator, which speed up the mixing function of the entropy - * pool, taken from PGPfone. Dale Worley has also contributed many - * useful ideas and suggestions to improve this driver. - * - * Any flaws in the design are solely my responsibility, and should - * not be attributed to the Phil, Colin, or any of authors of PGP. - * - * Further background information on this topic may be obtained from - * RFC 1750, "Randomness Recommendations for Security", by Donald - * Eastlake, Steve Crocker, and Jeff Schiller. + * The high level overview is that there is one input pool, into which + * various pieces of data are hashed. Some of that data is then "credited" as + * having a certain number of bits of entropy. When enough bits of entropy are + * available, the hash is finalized and handed as a key to a stream cipher that + * expands it indefinitely for various consumers. This key is periodically + * refreshed as the various entropy collectors, described below, add data to the + * input pool and credit it. There is currently no Fortuna-like scheduler + * involved, which can lead to malicious entropy sources causing a premature + * reseed, and the entropy estimates are, at best, conservative guesses. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -344,1073 +52,131 @@ #include #include #include +#include #include #include - #include -#include #include #include #include -#define CREATE_TRACE_POINTS -#include - -/* #define ADD_INTERRUPT_BENCH */ - -/* - * If the entropy count falls under this number of bits, then we - * should wake up processes which are selecting or polling on write - * access to /dev/random. - */ -static int random_write_wakeup_bits = 28 * (1 << 5); - -/* - * Originally, we used a primitive polynomial of degree .poolwords - * over GF(2). The taps for various sizes are defined below. They - * were chosen to be evenly spaced except for the last tap, which is 1 - * to get the twisting happening as fast as possible. +/********************************************************************* * - * For the purposes of better mixing, we use the CRC-32 polynomial as - * well to make a (modified) twisted Generalized Feedback Shift - * Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR - * generators. ACM Transactions on Modeling and Computer Simulation - * 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted - * GFSR generators II. ACM Transactions on Modeling and Computer - * Simulation 4:254-266) + * Initialization and readiness waiting. * - * Thanks to Colin Plumb for suggesting this. + * Much of the RNG infrastructure is devoted to various dependencies + * being able to wait until the RNG has collected enough entropy and + * is ready for safe consumption. * - * The mixing operation is much less sensitive than the output hash, - * where we use BLAKE2s. All that we want of mixing operation is that - * it be a good non-cryptographic hash; i.e. it not produce collisions - * when fed "random" data of the sort we expect to see. As long as - * the pool state differs for different inputs, we have preserved the - * input entropy and done a good job. The fact that an intelligent - * attacker can construct inputs that will produce controlled - * alterations to the pool's state is not important because we don't - * consider such inputs to contribute any randomness. The only - * property we need with respect to them is that the attacker can't - * increase his/her knowledge of the pool's state. Since all - * additions are reversible (knowing the final state and the input, - * you can reconstruct the initial state), if an attacker has any - * uncertainty about the initial state, he/she can only shuffle that - * uncertainty about, but never cause any collisions (which would - * decrease the uncertainty). - * - * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and - * Videau in their paper, "The Linux Pseudorandom Number Generator - * Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their - * paper, they point out that we are not using a true Twisted GFSR, - * since Matsumoto & Kurita used a trinomial feedback polynomial (that - * is, with only three taps, instead of the six that we are using). - * As a result, the resulting polynomial is neither primitive nor - * irreducible, and hence does not have a maximal period over - * GF(2**32). They suggest a slight change to the generator - * polynomial which improves the resulting TGFSR polynomial to be - * irreducible, which we have made here. - */ -enum poolinfo { - POOL_WORDS = 128, - POOL_WORDMASK = POOL_WORDS - 1, - POOL_BYTES = POOL_WORDS * sizeof(u32), - POOL_BITS = POOL_BYTES * 8, - POOL_BITSHIFT = ilog2(POOL_BITS), - - /* To allow fractional bits to be tracked, the entropy_count field is - * denominated in units of 1/8th bits. */ - POOL_ENTROPY_SHIFT = 3, -#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT) - POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT, - - /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - POOL_TAP1 = 104, - POOL_TAP2 = 76, - POOL_TAP3 = 51, - POOL_TAP4 = 25, - POOL_TAP5 = 1, - - EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2 -}; - -/* - * Static global variables - */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -static struct fasync_struct *fasync; - -static DEFINE_SPINLOCK(random_ready_list_lock); -static LIST_HEAD(random_ready_list); - -struct crng_state { - u32 state[16]; - unsigned long init_time; - spinlock_t lock; -}; - -static struct crng_state primary_crng = { - .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), - .state[0] = CHACHA_CONSTANT_EXPA, - .state[1] = CHACHA_CONSTANT_ND_3, - .state[2] = CHACHA_CONSTANT_2_BY, - .state[3] = CHACHA_CONSTANT_TE_K, -}; + *********************************************************************/ /* * crng_init = 0 --> Uninitialized * 1 --> Initialized * 2 --> Initialized from input_pool * - * crng_init is protected by primary_crng->lock, and only increases + * crng_init is protected by base_crng->lock, and only increases * its value (from 0->1->2). */ static int crng_init = 0; -static bool crng_need_final_init = false; #define crng_ready() (likely(crng_init > 1)) -static int crng_init_cnt = 0; -static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]); -static void _crng_backtrack_protect(struct crng_state *crng, - u8 tmp[CHACHA_BLOCK_SIZE], int used); -static void process_random_ready_list(void); -static void _get_random_bytes(void *buf, int nbytes); +/* Various types of waiters for crng_init->2 transition. */ +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); +static struct fasync_struct *fasync; +static DEFINE_SPINLOCK(random_ready_chain_lock); +static RAW_NOTIFIER_HEAD(random_ready_chain); +/* Control how we warn userspace. */ static struct ratelimit_state unseeded_warning = RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); -static struct ratelimit_state urandom_warning = - RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); - static int ratelimit_disable __read_mostly; - module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); -/********************************************************************** +/* + * Returns whether or not the input pool has been seeded and thus guaranteed + * to supply cryptographically secure random numbers. This applies to + * get_random_bytes() and get_random_{u32,u64,int,long}(). * - * OS independent entropy store. Here are the functions which handle - * storing entropy in an entropy pool. - * - **********************************************************************/ + * Returns: true if the input pool has been seeded. + * false if the input pool has not been seeded. + */ +bool rng_is_initialized(void) +{ + return crng_ready(); +} +EXPORT_SYMBOL(rng_is_initialized); -static u32 input_pool_data[POOL_WORDS] __latent_entropy; - -static struct { - spinlock_t lock; - u16 add_ptr; - u16 input_rotate; - int entropy_count; -} input_pool = { - .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), -}; - -static ssize_t extract_entropy(void *buf, size_t nbytes, int min); -static ssize_t _extract_entropy(void *buf, size_t nbytes); - -static void crng_reseed(struct crng_state *crng, bool use_input_pool); - -static const u32 twist_table[8] = { - 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, - 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; +/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ +static void try_to_generate_entropy(void); /* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. + * Wait for the input pool to be seeded and thus guaranteed to supply + * cryptographically secure random numbers. This applies to + * get_random_bytes() and get_random_{u32,u64,int,long}(). Using any + * of these functions without first calling this function means that + * the returned numbers might not be cryptographically secure. * - * The pool is stirred with a primitive polynomial of the appropriate - * degree, and then twisted. We twist by three bits at a time because - * it's cheap to do so and helps slightly in the expected case where - * the entropy is concentrated in the low-order bits. + * Returns: 0 if the input pool has been seeded. + * -ERESTARTSYS if the function was interrupted by a signal. */ -static void _mix_pool_bytes(const void *in, int nbytes) +int wait_for_random_bytes(void) { - unsigned long i; - int input_rotate; - const u8 *bytes = in; - u32 w; + while (!crng_ready()) { + int ret; - input_rotate = input_pool.input_rotate; - i = input_pool.add_ptr; - - /* mix one byte at a time to simplify size handling and churn faster */ - while (nbytes--) { - w = rol32(*bytes++, input_rotate); - i = (i - 1) & POOL_WORDMASK; - - /* XOR in the various taps */ - w ^= input_pool_data[i]; - w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK]; - w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK]; - - /* Mix the result back in with a twist */ - input_pool_data[i] = (w >> 3) ^ twist_table[w & 7]; - - /* - * Normally, we add 7 bits of rotation to the pool. - * At the beginning of the pool, add an extra 7 bits - * rotation, so that successive passes spread the - * input bits across the pool evenly. - */ - input_rotate = (input_rotate + (i ? 7 : 14)) & 31; + try_to_generate_entropy(); + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; } - - input_pool.input_rotate = input_rotate; - input_pool.add_ptr = i; + return 0; } +EXPORT_SYMBOL(wait_for_random_bytes); -static void __mix_pool_bytes(const void *in, int nbytes) -{ - trace_mix_pool_bytes_nolock(nbytes, _RET_IP_); - _mix_pool_bytes(in, nbytes); -} - -static void mix_pool_bytes(const void *in, int nbytes) +/* + * Add a callback function that will be invoked when the input + * pool is initialised. + * + * returns: 0 if callback is successfully added + * -EALREADY if pool is already initialised (callback not called) + */ +int register_random_ready_notifier(struct notifier_block *nb) { unsigned long flags; + int ret = -EALREADY; - trace_mix_pool_bytes(nbytes, _RET_IP_); - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); - spin_unlock_irqrestore(&input_pool.lock, flags); + if (crng_ready()) + return ret; + + spin_lock_irqsave(&random_ready_chain_lock, flags); + if (!crng_ready()) + ret = raw_notifier_chain_register(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; } -struct fast_pool { - u32 pool[4]; - unsigned long last; - u16 reg_idx; - u8 count; -}; - /* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. + * Delete a previously registered readiness callback function. */ -static void fast_mix(struct fast_pool *f) +int unregister_random_ready_notifier(struct notifier_block *nb) { - u32 a = f->pool[0], b = f->pool[1]; - u32 c = f->pool[2], d = f->pool[3]; + unsigned long flags; + int ret; - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - f->pool[0] = a; f->pool[1] = b; - f->pool[2] = c; f->pool[3] = d; - f->count++; + spin_lock_irqsave(&random_ready_chain_lock, flags); + ret = raw_notifier_chain_unregister(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; } static void process_random_ready_list(void) { unsigned long flags; - struct random_ready_callback *rdy, *tmp; - spin_lock_irqsave(&random_ready_list_lock, flags); - list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { - struct module *owner = rdy->owner; - - list_del_init(&rdy->list); - rdy->func(rdy); - module_put(owner); - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); -} - -/* - * Credit (or debit) the entropy store with n bits of entropy. - * Use credit_entropy_bits_safe() if the value comes from userspace - * or otherwise should be checked for extreme values. - */ -static void credit_entropy_bits(int nbits) -{ - int entropy_count, entropy_bits, orig; - int nfrac = nbits << POOL_ENTROPY_SHIFT; - - /* Ensure that the multiplication can avoid being 64 bits wide. */ - BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31); - - if (!nbits) - return; - -retry: - entropy_count = orig = READ_ONCE(input_pool.entropy_count); - if (nfrac < 0) { - /* Debit */ - entropy_count += nfrac; - } else { - /* - * Credit: we have to account for the possibility of - * overwriting already present entropy. Even in the - * ideal case of pure Shannon entropy, new contributions - * approach the full value asymptotically: - * - * entropy <- entropy + (pool_size - entropy) * - * (1 - exp(-add_entropy/pool_size)) - * - * For add_entropy <= pool_size/2 then - * (1 - exp(-add_entropy/pool_size)) >= - * (add_entropy/pool_size)*0.7869... - * so we can approximate the exponential with - * 3/4*add_entropy/pool_size and still be on the - * safe side by adding at most pool_size/2 at a time. - * - * The use of pool_size-2 in the while statement is to - * prevent rounding artifacts from making the loop - * arbitrarily long; this limits the loop to log2(pool_size)*2 - * turns no matter how large nbits is. - */ - int pnfrac = nfrac; - const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2; - /* The +2 corresponds to the /4 in the denominator */ - - do { - unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2); - unsigned int add = - ((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s; - - entropy_count += add; - pnfrac -= anfrac; - } while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac)); - } - - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy/overflow: count %d\n", entropy_count); - entropy_count = 0; - } else if (entropy_count > POOL_FRACBITS) - entropy_count = POOL_FRACBITS; - if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) - goto retry; - - trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_); - - entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT; - if (crng_init < 2 && entropy_bits >= 128) - crng_reseed(&primary_crng, true); -} - -static int credit_entropy_bits_safe(int nbits) -{ - if (nbits < 0) - return -EINVAL; - - /* Cap the value to avoid overflows */ - nbits = min(nbits, POOL_BITS); - - credit_entropy_bits(nbits); - return 0; -} - -/********************************************************************* - * - * CRNG using CHACHA20 - * - *********************************************************************/ - -#define CRNG_RESEED_INTERVAL (300 * HZ) - -static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); - -/* - * Hack to deal with crazy userspace progams when they are all trying - * to access /dev/urandom in parallel. The programs are almost - * certainly doing something terribly wrong, but we'll work around - * their brain damage. - */ -static struct crng_state **crng_node_pool __read_mostly; - -static void invalidate_batched_entropy(void); -static void numa_crng_init(void); - -static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); -static int __init parse_trust_cpu(char *arg) -{ - return kstrtobool(arg, &trust_cpu); -} -early_param("random.trust_cpu", parse_trust_cpu); - -static bool crng_init_try_arch(struct crng_state *crng) -{ - int i; - bool arch_init = true; - unsigned long rv; - - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - crng->state[i] ^= rv; - } - - return arch_init; -} - -static bool __init crng_init_try_arch_early(void) -{ - int i; - bool arch_init = true; - unsigned long rv; - - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - primary_crng.state[i] ^= rv; - } - - return arch_init; -} - -static void crng_initialize_secondary(struct crng_state *crng) -{ - chacha_init_consts(crng->state); - _get_random_bytes(&crng->state[4], sizeof(u32) * 12); - crng_init_try_arch(crng); - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - -static void __init crng_initialize_primary(void) -{ - _extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); - if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - pr_notice("crng init done (trusting CPU's manufacturer)\n"); - } - primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - -static void crng_finalize_init(void) -{ - if (!system_wq) { - /* We can't call numa_crng_init until we have workqueues, - * so mark this for processing later. */ - crng_need_final_init = true; - return; - } - - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - crng_need_final_init = false; - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } -} - -static void do_numa_crng_init(struct work_struct *work) -{ - int i; - struct crng_state *crng; - struct crng_state **pool; - - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL); - for_each_online_node(i) { - crng = kmalloc_node(sizeof(struct crng_state), - GFP_KERNEL | __GFP_NOFAIL, i); - spin_lock_init(&crng->lock); - crng_initialize_secondary(crng); - pool[i] = crng; - } - /* pairs with READ_ONCE() in select_crng() */ - if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) { - for_each_node(i) - kfree(pool[i]); - kfree(pool); - } -} - -static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); - -static void numa_crng_init(void) -{ - if (IS_ENABLED(CONFIG_NUMA)) - schedule_work(&numa_crng_init_work); -} - -static struct crng_state *select_crng(void) -{ - if (IS_ENABLED(CONFIG_NUMA)) { - struct crng_state **pool; - int nid = numa_node_id(); - - /* pairs with cmpxchg_release() in do_numa_crng_init() */ - pool = READ_ONCE(crng_node_pool); - if (pool && pool[nid]) - return pool[nid]; - } - - return &primary_crng; -} - -/* - * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. Returns the number of - * bytes processed from cp. - */ -static size_t crng_fast_load(const u8 *cp, size_t len) -{ - unsigned long flags; - u8 *p; - size_t ret = 0; - - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 0; - } - p = (u8 *)&primary_crng.state[4]; - while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; - cp++; crng_init_cnt++; len--; ret++; - } - spin_unlock_irqrestore(&primary_crng.lock, flags); - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - invalidate_batched_entropy(); - crng_init = 1; - pr_notice("fast init done\n"); - } - return ret; -} - -/* - * crng_slow_load() is called by add_device_randomness, which has two - * attributes. (1) We can't trust the buffer passed to it is - * guaranteed to be unpredictable (so it might not have any entropy at - * all), and (2) it doesn't have the performance constraints of - * crng_fast_load(). - * - * So we do something more comprehensive which is guaranteed to touch - * all of the primary_crng's state, and which uses a LFSR with a - * period of 255 as part of the mixing algorithm. Finally, we do - * *not* advance crng_init_cnt since buffer we may get may be something - * like a fixed DMI table (for example), which might very well be - * unique to the machine, but is otherwise unvarying. - */ -static int crng_slow_load(const u8 *cp, size_t len) -{ - unsigned long flags; - static u8 lfsr = 1; - u8 tmp; - unsigned int i, max = CHACHA_KEY_SIZE; - const u8 *src_buf = cp; - u8 *dest_buf = (u8 *)&primary_crng.state[4]; - - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 0; - } - if (len > max) - max = len; - - for (i = 0; i < max; i++) { - tmp = lfsr; - lfsr >>= 1; - if (tmp & 1) - lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA_KEY_SIZE]; - dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; - lfsr += (tmp << 3) | (tmp >> 5); - } - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 1; -} - -static void crng_reseed(struct crng_state *crng, bool use_input_pool) -{ - unsigned long flags; - int i, num; - union { - u8 block[CHACHA_BLOCK_SIZE]; - u32 key[8]; - } buf; - - if (use_input_pool) { - num = extract_entropy(&buf, 32, 16); - if (num == 0) - return; - } else { - _extract_crng(&primary_crng, buf.block); - _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA_KEY_SIZE); - } - spin_lock_irqsave(&crng->lock, flags); - for (i = 0; i < 8; i++) { - unsigned long rv; - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - crng->state[i + 4] ^= buf.key[i] ^ rv; - } - memzero_explicit(&buf, sizeof(buf)); - WRITE_ONCE(crng->init_time, jiffies); - spin_unlock_irqrestore(&crng->lock, flags); - if (crng == &primary_crng && crng_init < 2) - crng_finalize_init(); -} - -static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) -{ - unsigned long flags, init_time; - - if (crng_ready()) { - init_time = READ_ONCE(crng->init_time); - if (time_after(READ_ONCE(crng_global_init_time), init_time) || - time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng, crng == &primary_crng); - } - spin_lock_irqsave(&crng->lock, flags); - chacha20_block(&crng->state[0], out); - if (crng->state[12] == 0) - crng->state[13]++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]) -{ - _extract_crng(select_crng(), out); -} - -/* - * Use the leftover bytes from the CRNG block output (if there is - * enough) to mutate the CRNG key to provide backtracking protection. - */ -static void _crng_backtrack_protect(struct crng_state *crng, - u8 tmp[CHACHA_BLOCK_SIZE], int used) -{ - unsigned long flags; - u32 *s, *d; - int i; - - used = round_up(used, sizeof(u32)); - if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { - extract_crng(tmp); - used = 0; - } - spin_lock_irqsave(&crng->lock, flags); - s = (u32 *)&tmp[used]; - d = &crng->state[4]; - for (i = 0; i < 8; i++) - *d++ ^= *s++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used) -{ - _crng_backtrack_protect(select_crng(), tmp, used); -} - -static ssize_t extract_crng_user(void __user *buf, size_t nbytes) -{ - ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; - u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - int large_request = (nbytes > 256); - - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) { - if (ret == 0) - ret = -ERESTARTSYS; - break; - } - schedule(); - } - - extract_crng(tmp); - i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, tmp, i)) { - ret = -EFAULT; - break; - } - - nbytes -= i; - buf += i; - ret += i; - } - crng_backtrack_protect(tmp, i); - - /* Wipe data just written to memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; -} - -/********************************************************************* - * - * Entropy input management - * - *********************************************************************/ - -/* There is one of these per entropy source */ -struct timer_rand_state { - cycles_t last_time; - long last_delta, last_delta2; -}; - -#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; - -/* - * Add device- or boot-specific data to the input pool to help - * initialize it. - * - * None of this adds any entropy; it is meant to avoid the problem of - * the entropy pool having similar initial state across largely - * identical devices. - */ -void add_device_randomness(const void *buf, unsigned int size) -{ - unsigned long time = random_get_entropy() ^ jiffies; - unsigned long flags; - - if (!crng_ready() && size) - crng_slow_load(buf, size); - - trace_add_device_randomness(size, _RET_IP_); - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(buf, size); - _mix_pool_bytes(&time, sizeof(time)); - spin_unlock_irqrestore(&input_pool.lock, flags); -} -EXPORT_SYMBOL(add_device_randomness); - -static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; - -/* - * This function adds entropy to the entropy "pool" by using timing - * delays. It uses the timer_rand_state structure to make an estimate - * of how many bits of entropy this call has added to the pool. - * - * The number "num" is also added to the pool - it should somehow describe - * the type of event which just happened. This is currently 0-255 for - * keyboard scan codes, and 256 upwards for interrupts. - * - */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned num) -{ - struct { - long jiffies; - unsigned int cycles; - unsigned int num; - } sample; - long delta, delta2, delta3; - - sample.jiffies = jiffies; - sample.cycles = random_get_entropy(); - sample.num = num; - mix_pool_bytes(&sample, sizeof(sample)); - - /* - * Calculate number of bits of randomness we probably added. - * We take into account the first, second and third-order deltas - * in order to make our estimate. - */ - delta = sample.jiffies - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, sample.jiffies); - - delta2 = delta - READ_ONCE(state->last_delta); - WRITE_ONCE(state->last_delta, delta); - - delta3 = delta2 - READ_ONCE(state->last_delta2); - WRITE_ONCE(state->last_delta2, delta2); - - if (delta < 0) - delta = -delta; - if (delta2 < 0) - delta2 = -delta2; - if (delta3 < 0) - delta3 = -delta3; - if (delta > delta2) - delta = delta2; - if (delta > delta3) - delta = delta3; - - /* - * delta is now minimum absolute delta. - * Round down by 1 bit on general principles, - * and limit entropy estimate to 12 bits. - */ - credit_entropy_bits(min_t(int, fls(delta >> 1), 11)); -} - -void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) -{ - static unsigned char last_value; - - /* ignore autorepeat and the like */ - if (value == last_value) - return; - - last_value = value; - add_timer_randomness(&input_timer_state, - (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(POOL_ENTROPY_BITS()); -} -EXPORT_SYMBOL_GPL(add_input_randomness); - -static DEFINE_PER_CPU(struct fast_pool, irq_randomness); - -#ifdef ADD_INTERRUPT_BENCH -static unsigned long avg_cycles, avg_deviation; - -#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ -#define FIXED_1_2 (1 << (AVG_SHIFT - 1)) - -static void add_interrupt_bench(cycles_t start) -{ - long delta = random_get_entropy() - start; - - /* Use a weighted moving average */ - delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); - avg_cycles += delta; - /* And average deviation */ - delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); - avg_deviation += delta; -} -#else -#define add_interrupt_bench(x) -#endif - -static u32 get_reg(struct fast_pool *f, struct pt_regs *regs) -{ - u32 *ptr = (u32 *)regs; - unsigned int idx; - - if (regs == NULL) - return 0; - idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(u32)) - idx = 0; - ptr += idx++; - WRITE_ONCE(f->reg_idx, idx); - return *ptr; -} - -void add_interrupt_randomness(int irq) -{ - struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); - unsigned long now = jiffies; - cycles_t cycles = random_get_entropy(); - u32 c_high, j_high; - u64 ip; - - if (cycles == 0) - cycles = get_reg(fast_pool, regs); - c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; - j_high = (sizeof(now) > 4) ? now >> 32 : 0; - fast_pool->pool[0] ^= cycles ^ j_high ^ irq; - fast_pool->pool[1] ^= now ^ c_high; - ip = regs ? instruction_pointer(regs) : _RET_IP_; - fast_pool->pool[2] ^= ip; - fast_pool->pool[3] ^= - (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs); - - fast_mix(fast_pool); - add_interrupt_bench(cycles); - - if (unlikely(crng_init == 0)) { - if ((fast_pool->count >= 64) && - crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) { - fast_pool->count = 0; - fast_pool->last = now; - } - return; - } - - if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ)) - return; - - if (!spin_trylock(&input_pool.lock)) - return; - - fast_pool->last = now; - __mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool)); - spin_unlock(&input_pool.lock); - - fast_pool->count = 0; - - /* award one bit for the contents of the fast pool */ - credit_entropy_bits(1); -} -EXPORT_SYMBOL_GPL(add_interrupt_randomness); - -#ifdef CONFIG_BLOCK -void add_disk_randomness(struct gendisk *disk) -{ - if (!disk || !disk->random) - return; - /* first major is 1, so we get >= 0x200 here */ - add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS()); -} -EXPORT_SYMBOL_GPL(add_disk_randomness); -#endif - -/********************************************************************* - * - * Entropy extraction routines - * - *********************************************************************/ - -/* - * This function decides how many bytes to actually take from the - * given pool, and also debits the entropy count accordingly. - */ -static size_t account(size_t nbytes, int min) -{ - int entropy_count, orig; - size_t ibytes, nfrac; - - BUG_ON(input_pool.entropy_count > POOL_FRACBITS); - - /* Can we pull enough? */ -retry: - entropy_count = orig = READ_ONCE(input_pool.entropy_count); - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy count: count %d\n", entropy_count); - entropy_count = 0; - } - - /* never pull more than available */ - ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3)); - if (ibytes < min) - ibytes = 0; - nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3); - if ((size_t)entropy_count > nfrac) - entropy_count -= nfrac; - else - entropy_count = 0; - - if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig) - goto retry; - - trace_debit_entropy(8 * ibytes); - if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } - - return ibytes; -} - -/* - * This function does the actual extraction for extract_entropy. - * - * Note: we assume that .poolwords is a multiple of 16 words. - */ -static void extract_buf(u8 *out) -{ - struct blake2s_state state __aligned(__alignof__(unsigned long)); - u8 hash[BLAKE2S_HASH_SIZE]; - unsigned long *salt; - unsigned long flags; - - blake2s_init(&state, sizeof(hash)); - - /* - * If we have an architectural hardware random number - * generator, use it for BLAKE2's salt & personal fields. - */ - for (salt = (unsigned long *)&state.h[4]; - salt < (unsigned long *)&state.h[8]; ++salt) { - unsigned long v; - if (!arch_get_random_long(&v)) - break; - *salt ^= v; - } - - /* Generate a hash across the pool */ - spin_lock_irqsave(&input_pool.lock, flags); - blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES); - blake2s_final(&state, hash); /* final zeros out state */ - - /* - * We mix the hash back into the pool to prevent backtracking - * attacks (where the attacker knows the state of the pool - * plus the current outputs, and attempts to find previous - * outputs), unless the hash function can be inverted. By - * mixing at least a hash worth of hash data back, we make - * brute-forcing the feedback as hard as brute-forcing the - * hash. - */ - __mix_pool_bytes(hash, sizeof(hash)); - spin_unlock_irqrestore(&input_pool.lock, flags); - - /* Note that EXTRACT_SIZE is half of hash size here, because above - * we've dumped the full length back into mixer. By reducing the - * amount that we emit, we retain a level of forward secrecy. - */ - memcpy(out, hash, EXTRACT_SIZE); - memzero_explicit(hash, sizeof(hash)); -} - -static ssize_t _extract_entropy(void *buf, size_t nbytes) -{ - ssize_t ret = 0, i; - u8 tmp[EXTRACT_SIZE]; - - while (nbytes) { - extract_buf(tmp); - i = min_t(int, nbytes, EXTRACT_SIZE); - memcpy(buf, tmp, i); - nbytes -= i; - buf += i; - ret += i; - } - - /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; -} - -/* - * This function extracts randomness from the "entropy pool", and - * returns it in a buffer. - * - * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding. - */ -static ssize_t extract_entropy(void *buf, size_t nbytes, int min) -{ - trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_); - nbytes = account(nbytes, min); - return _extract_entropy(buf, nbytes); + spin_lock_irqsave(&random_ready_chain_lock, flags); + raw_notifier_call_chain(&random_ready_chain, 0, NULL); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); } #define warn_unseeded_randomness(previous) \ @@ -1436,6 +202,305 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void func_name, caller, crng_init); } + +/********************************************************************* + * + * Fast key erasure RNG, the "crng". + * + * These functions expand entropy from the entropy extractor into + * long streams for external consumption using the "fast key erasure" + * RNG described at . + * + * There are a few exported interfaces for use by other drivers: + * + * void get_random_bytes(void *buf, size_t nbytes) + * u32 get_random_u32() + * u64 get_random_u64() + * unsigned int get_random_int() + * unsigned long get_random_long() + * + * These interfaces will return the requested number of random bytes + * into the given buffer or as a return value. The returned numbers are + * the same as those of getrandom(0). The integer family of functions may + * be higher performance for one-off random integers, because they do a + * bit of buffering and do not invoke reseeding. + * + *********************************************************************/ + +enum { + CRNG_RESEED_INTERVAL = 300 * HZ, + CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE +}; + +static struct { + u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); + unsigned long birth; + unsigned long generation; + spinlock_t lock; +} base_crng = { + .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock) +}; + +struct crng { + u8 key[CHACHA_KEY_SIZE]; + unsigned long generation; + local_lock_t lock; +}; + +static DEFINE_PER_CPU(struct crng, crngs) = { + .generation = ULONG_MAX, + .lock = INIT_LOCAL_LOCK(crngs.lock), +}; + +/* Used by crng_reseed() to extract a new seed from the input pool. */ +static bool drain_entropy(void *buf, size_t nbytes, bool force); + +/* + * This extracts a new crng key from the input pool, but only if there is a + * sufficient amount of entropy available or force is true, in order to + * mitigate bruteforcing of newly added bits. + */ +static void crng_reseed(bool force) +{ + unsigned long flags; + unsigned long next_gen; + u8 key[CHACHA_KEY_SIZE]; + bool finalize_init = false; + + /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */ + if (!drain_entropy(key, sizeof(key), force)) + return; + + /* + * We copy the new key into the base_crng, overwriting the old one, + * and update the generation counter. We avoid hitting ULONG_MAX, + * because the per-cpu crngs are initialized to ULONG_MAX, so this + * forces new CPUs that come online to always initialize. + */ + spin_lock_irqsave(&base_crng.lock, flags); + memcpy(base_crng.key, key, sizeof(base_crng.key)); + next_gen = base_crng.generation + 1; + if (next_gen == ULONG_MAX) + ++next_gen; + WRITE_ONCE(base_crng.generation, next_gen); + WRITE_ONCE(base_crng.birth, jiffies); + if (!crng_ready()) { + crng_init = 2; + finalize_init = true; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + memzero_explicit(key, sizeof(key)); + if (finalize_init) { + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (unseeded_warning.missed) { + pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", + unseeded_warning.missed); + unseeded_warning.missed = 0; + } + } +} + +/* + * This generates a ChaCha block using the provided key, and then + * immediately overwites that key with half the block. It returns + * the resultant ChaCha state to the user, along with the second + * half of the block containing 32 bytes of random data that may + * be used; random_data_len may not be greater than 32. + */ +static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], + u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) +{ + u8 first_block[CHACHA_BLOCK_SIZE]; + + BUG_ON(random_data_len > 32); + + chacha_init_consts(chacha_state); + memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); + memset(&chacha_state[12], 0, sizeof(u32) * 4); + chacha20_block(chacha_state, first_block); + + memcpy(key, first_block, CHACHA_KEY_SIZE); + memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memzero_explicit(first_block, sizeof(first_block)); +} + +/* + * Return whether the crng seed is considered to be sufficiently + * old that a reseeding might be attempted. This happens if the last + * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at + * an interval proportional to the uptime. + */ +static bool crng_has_old_seed(void) +{ + static bool early_boot = true; + unsigned long interval = CRNG_RESEED_INTERVAL; + + if (unlikely(READ_ONCE(early_boot))) { + time64_t uptime = ktime_get_seconds(); + if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) + WRITE_ONCE(early_boot, false); + else + interval = max_t(unsigned int, 5 * HZ, + (unsigned int)uptime / 2 * HZ); + } + return time_after(jiffies, READ_ONCE(base_crng.birth) + interval); +} + +/* + * This function returns a ChaCha state that you may use for generating + * random data. It also returns up to 32 bytes on its own of random data + * that may be used; random_data_len may not be greater than 32. + */ +static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) +{ + unsigned long flags; + struct crng *crng; + + BUG_ON(random_data_len > 32); + + /* + * For the fast path, we check whether we're ready, unlocked first, and + * then re-check once locked later. In the case where we're really not + * ready, we do fast key erasure with the base_crng directly, because + * this is what crng_pre_init_inject() mutates during early init. + */ + if (!crng_ready()) { + bool ready; + + spin_lock_irqsave(&base_crng.lock, flags); + ready = crng_ready(); + if (!ready) + crng_fast_key_erasure(base_crng.key, chacha_state, + random_data, random_data_len); + spin_unlock_irqrestore(&base_crng.lock, flags); + if (!ready) + return; + } + + /* + * If the base_crng is old enough, we try to reseed, which in turn + * bumps the generation counter that we check below. + */ + if (unlikely(crng_has_old_seed())) + crng_reseed(false); + + local_lock_irqsave(&crngs.lock, flags); + crng = raw_cpu_ptr(&crngs); + + /* + * If our per-cpu crng is older than the base_crng, then it means + * somebody reseeded the base_crng. In that case, we do fast key + * erasure on the base_crng, and use its output as the new key + * for our per-cpu crng. This brings us up to date with base_crng. + */ + if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { + spin_lock(&base_crng.lock); + crng_fast_key_erasure(base_crng.key, chacha_state, + crng->key, sizeof(crng->key)); + crng->generation = base_crng.generation; + spin_unlock(&base_crng.lock); + } + + /* + * Finally, when we've made it this far, our per-cpu crng has an up + * to date key, and we can do fast key erasure with it to produce + * some random data and a ChaCha state for the caller. All other + * branches of this function are "unlikely", so most of the time we + * should wind up here immediately. + */ + crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); + local_unlock_irqrestore(&crngs.lock, flags); +} + +/* + * This function is for crng_init == 0 only. It loads entropy directly + * into the crng's key, without going through the input pool. It is, + * generally speaking, not very safe, but we use this only at early + * boot time when it's better to have something there rather than + * nothing. + * + * If account is set, then the crng_init_cnt counter is incremented. + * This shouldn't be set by functions like add_device_randomness(), + * where we can't trust the buffer passed to it is guaranteed to be + * unpredictable (so it might not have any entropy at all). + * + * Returns the number of bytes processed from input, which is bounded + * by CRNG_INIT_CNT_THRESH if account is true. + */ +static size_t crng_pre_init_inject(const void *input, size_t len, bool account) +{ + static int crng_init_cnt = 0; + struct blake2s_state hash; + unsigned long flags; + + blake2s_init(&hash, sizeof(base_crng.key)); + + spin_lock_irqsave(&base_crng.lock, flags); + if (crng_init != 0) { + spin_unlock_irqrestore(&base_crng.lock, flags); + return 0; + } + + if (account) + len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); + + blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); + blake2s_update(&hash, input, len); + blake2s_final(&hash, base_crng.key); + + if (account) { + crng_init_cnt += len; + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { + ++base_crng.generation; + crng_init = 1; + } + } + + spin_unlock_irqrestore(&base_crng.lock, flags); + + if (crng_init == 1) + pr_notice("fast init done\n"); + + return len; +} + +static void _get_random_bytes(void *buf, size_t nbytes) +{ + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 tmp[CHACHA_BLOCK_SIZE]; + size_t len; + + if (!nbytes) + return; + + len = min_t(size_t, 32, nbytes); + crng_make_state(chacha_state, buf, len); + nbytes -= len; + buf += len; + + while (nbytes) { + if (nbytes < CHACHA_BLOCK_SIZE) { + chacha20_block(chacha_state, tmp); + memcpy(buf, tmp, nbytes); + memzero_explicit(tmp, sizeof(tmp)); + break; + } + + chacha20_block(chacha_state, buf); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + nbytes -= CHACHA_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; + } + + memzero_explicit(chacha_state, sizeof(chacha_state)); +} + /* * This function is the exported kernel interface. It returns some * number of good random numbers, suitable for key generation, seeding @@ -1446,28 +511,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ -static void _get_random_bytes(void *buf, int nbytes) -{ - u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - - trace_get_random_bytes(nbytes, _RET_IP_); - - while (nbytes >= CHACHA_BLOCK_SIZE) { - extract_crng(buf); - buf += CHACHA_BLOCK_SIZE; - nbytes -= CHACHA_BLOCK_SIZE; - } - - if (nbytes > 0) { - extract_crng(tmp); - memcpy(buf, tmp, nbytes); - crng_backtrack_protect(tmp, nbytes); - } else - crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); - memzero_explicit(tmp, sizeof(tmp)); -} - -void get_random_bytes(void *buf, int nbytes) +void get_random_bytes(void *buf, size_t nbytes) { static void *previous; @@ -1476,619 +520,80 @@ void get_random_bytes(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes); -/* - * Each time the timer fires, we expect that we got an unpredictable - * jump in the cycle counter. Even if the timer is running on another - * CPU, the timer activity will be touching the stack of the CPU that is - * generating entropy.. - * - * Note that we don't re-arm the timer in the timer itself - we are - * happy to be scheduled away, since that just makes the load more - * complex, but we do not want the timer to keep ticking unless the - * entropy loop is running. - * - * So the re-arming always happens in the entropy loop itself. - */ -static void entropy_timer(struct timer_list *t) +static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - credit_entropy_bits(1); -} + bool large_request = nbytes > 256; + ssize_t ret = 0; + size_t len; + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 output[CHACHA_BLOCK_SIZE]; -/* - * If we have an actual cycle counter, see if we can - * generate enough entropy with timing noise - */ -static void try_to_generate_entropy(void) -{ - struct { - unsigned long now; - struct timer_list timer; - } stack; - - stack.now = random_get_entropy(); - - /* Slow counter - or none. Don't even bother */ - if (stack.now == random_get_entropy()) - return; - - timer_setup_on_stack(&stack.timer, entropy_timer, 0); - while (!crng_ready()) { - if (!timer_pending(&stack.timer)) - mod_timer(&stack.timer, jiffies + 1); - mix_pool_bytes(&stack.now, sizeof(stack.now)); - schedule(); - stack.now = random_get_entropy(); - } - - del_timer_sync(&stack.timer); - destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&stack.now, sizeof(stack.now)); -} - -/* - * Wait for the urandom pool to be seeded and thus guaranteed to supply - * cryptographically secure random numbers. This applies to: the /dev/urandom - * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} - * family of functions. Using any of these functions without first calling - * this function forfeits the guarantee of security. - * - * Returns: 0 if the urandom pool has been seeded. - * -ERESTARTSYS if the function was interrupted by a signal. - */ -int wait_for_random_bytes(void) -{ - if (likely(crng_ready())) + if (!nbytes) return 0; - do { - int ret; - ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); - if (ret) - return ret > 0 ? 0 : ret; + len = min_t(size_t, 32, nbytes); + crng_make_state(chacha_state, output, len); - try_to_generate_entropy(); - } while (!crng_ready()); + if (copy_to_user(buf, output, len)) + return -EFAULT; + nbytes -= len; + buf += len; + ret += len; - return 0; -} -EXPORT_SYMBOL(wait_for_random_bytes); + while (nbytes) { + if (large_request && need_resched()) { + if (signal_pending(current)) + break; + schedule(); + } -/* - * Returns whether or not the urandom pool has been seeded and thus guaranteed - * to supply cryptographically secure random numbers. This applies to: the - * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, - * ,u64,int,long} family of functions. - * - * Returns: true if the urandom pool has been seeded. - * false if the urandom pool has not been seeded. - */ -bool rng_is_initialized(void) -{ - return crng_ready(); -} -EXPORT_SYMBOL(rng_is_initialized); + chacha20_block(chacha_state, output); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; -/* - * Add a callback function that will be invoked when the nonblocking - * pool is initialised. - * - * returns: 0 if callback is successfully added - * -EALREADY if pool is already initialised (callback not called) - * -ENOENT if module for callback is not alive - */ -int add_random_ready_callback(struct random_ready_callback *rdy) -{ - struct module *owner; - unsigned long flags; - int err = -EALREADY; - - if (crng_ready()) - return err; - - owner = rdy->owner; - if (!try_module_get(owner)) - return -ENOENT; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (crng_ready()) - goto out; - - owner = NULL; - - list_add(&rdy->list, &random_ready_list); - err = 0; - -out: - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); - - return err; -} -EXPORT_SYMBOL(add_random_ready_callback); - -/* - * Delete a previously registered readiness callback function. - */ -void del_random_ready_callback(struct random_ready_callback *rdy) -{ - unsigned long flags; - struct module *owner = NULL; - - spin_lock_irqsave(&random_ready_list_lock, flags); - if (!list_empty(&rdy->list)) { - list_del_init(&rdy->list); - owner = rdy->owner; - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); - - module_put(owner); -} -EXPORT_SYMBOL(del_random_ready_callback); - -/* - * This function will use the architecture-specific hardware random - * number generator if it is available. The arch-specific hw RNG will - * almost certainly be faster than what we can do in software, but it - * is impossible to verify that it is implemented securely (as - * opposed, to, say, the AES encryption of a sequence number using a - * key known by the NSA). So it's useful if we need the speed, but - * only if we're willing to trust the hardware manufacturer not to - * have put in a back door. - * - * Return number of bytes filled in. - */ -int __must_check get_random_bytes_arch(void *buf, int nbytes) -{ - int left = nbytes; - u8 *p = buf; - - trace_get_random_bytes_arch(left, _RET_IP_); - while (left) { - unsigned long v; - int chunk = min_t(int, left, sizeof(unsigned long)); - - if (!arch_get_random_long(&v)) + len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); + if (copy_to_user(buf, output, len)) { + ret = -EFAULT; break; + } - memcpy(p, &v, chunk); - p += chunk; - left -= chunk; + nbytes -= len; + buf += len; + ret += len; } - return nbytes - left; -} -EXPORT_SYMBOL(get_random_bytes_arch); - -/* - * init_std_data - initialize pool with system data - * - * This function clears the pool's entropy count and mixes some system - * data into the pool to prepare it for use. The pool is not cleared - * as that can only decrease the entropy in the pool. - */ -static void __init init_std_data(void) -{ - int i; - ktime_t now = ktime_get_real(); - unsigned long rv; - - mix_pool_bytes(&now, sizeof(now)); - for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - mix_pool_bytes(&rv, sizeof(rv)); - } - mix_pool_bytes(utsname(), sizeof(*(utsname()))); -} - -/* - * Note that setup_arch() may call add_device_randomness() - * long before we get here. This allows seeding of the pools - * with some platform dependent data very early in the boot - * process. But it limits our options here. We must use - * statically allocated structures that already have all - * initializations complete at compile time. We should also - * take care not to overwrite the precious per platform data - * we were given. - */ -int __init rand_initialize(void) -{ - init_std_data(); - if (crng_need_final_init) - crng_finalize_init(); - crng_initialize_primary(); - crng_global_init_time = jiffies; - if (ratelimit_disable) { - urandom_warning.interval = 0; - unseeded_warning.interval = 0; - } - return 0; -} - -#ifdef CONFIG_BLOCK -void rand_initialize_disk(struct gendisk *disk) -{ - struct timer_rand_state *state; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) { - state->last_time = INITIAL_JIFFIES; - disk->random = state; - } -} -#endif - -static ssize_t urandom_read_nowarn(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) -{ - int ret; - - nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3)); - ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS()); + memzero_explicit(chacha_state, sizeof(chacha_state)); + memzero_explicit(output, sizeof(output)); return ret; } -static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - static int maxwarn = 10; - - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) - pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, nbytes); - } - - return urandom_read_nowarn(file, buf, nbytes, ppos); -} - -static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - int ret; - - ret = wait_for_random_bytes(); - if (ret != 0) - return ret; - return urandom_read_nowarn(file, buf, nbytes, ppos); -} - -static __poll_t random_poll(struct file *file, poll_table *wait) -{ - __poll_t mask; - - poll_wait(file, &crng_init_wait, wait); - poll_wait(file, &random_write_wait, wait); - mask = 0; - if (crng_ready()) - mask |= EPOLLIN | EPOLLRDNORM; - if (POOL_ENTROPY_BITS() < random_write_wakeup_bits) - mask |= EPOLLOUT | EPOLLWRNORM; - return mask; -} - -static int write_pool(const char __user *buffer, size_t count) -{ - size_t bytes; - u32 t, buf[16]; - const char __user *p = buffer; - - while (count > 0) { - int b, i = 0; - - bytes = min(count, sizeof(buf)); - if (copy_from_user(&buf, p, bytes)) - return -EFAULT; - - for (b = bytes; b > 0; b -= sizeof(u32), i++) { - if (!arch_get_random_int(&t)) - break; - buf[i] ^= t; - } - - count -= bytes; - p += bytes; - - mix_pool_bytes(buf, bytes); - cond_resched(); - } - - return 0; -} - -static ssize_t random_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - size_t ret; - - ret = write_pool(buffer, count); - if (ret) - return ret; - - return (ssize_t)count; -} - -static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) -{ - int size, ent_count; - int __user *p = (int __user *)arg; - int retval; - - switch (cmd) { - case RNDGETENTCNT: - /* inherently racy, no point locking */ - ent_count = POOL_ENTROPY_BITS(); - if (put_user(ent_count, p)) - return -EFAULT; - return 0; - case RNDADDTOENTCNT: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (get_user(ent_count, p)) - return -EFAULT; - return credit_entropy_bits_safe(ent_count); - case RNDADDENTROPY: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (get_user(ent_count, p++)) - return -EFAULT; - if (ent_count < 0) - return -EINVAL; - if (get_user(size, p++)) - return -EFAULT; - retval = write_pool((const char __user *)p, size); - if (retval < 0) - return retval; - return credit_entropy_bits_safe(ent_count); - case RNDZAPENTCNT: - case RNDCLEARPOOL: - /* - * Clear the entropy pool counters. We no longer clear - * the entropy pool, as that's silly. - */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } - return 0; - case RNDRESEEDCRNG: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (crng_init < 2) - return -ENODATA; - crng_reseed(&primary_crng, true); - WRITE_ONCE(crng_global_init_time, jiffies - 1); - return 0; - default: - return -EINVAL; - } -} - -static int random_fasync(int fd, struct file *filp, int on) -{ - return fasync_helper(fd, filp, on, &fasync); -} - -const struct file_operations random_fops = { - .read = random_read, - .write = random_write, - .poll = random_poll, - .unlocked_ioctl = random_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .fasync = random_fasync, - .llseek = noop_llseek, -}; - -const struct file_operations urandom_fops = { - .read = urandom_read, - .write = random_write, - .unlocked_ioctl = random_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .fasync = random_fasync, - .llseek = noop_llseek, -}; - -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, - flags) -{ - int ret; - - if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) - return -EINVAL; - - /* - * Requesting insecure and blocking randomness at the same time makes - * no sense. - */ - if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) - return -EINVAL; - - if (count > INT_MAX) - count = INT_MAX; - - if (!(flags & GRND_INSECURE) && !crng_ready()) { - if (flags & GRND_NONBLOCK) - return -EAGAIN; - ret = wait_for_random_bytes(); - if (unlikely(ret)) - return ret; - } - return urandom_read_nowarn(NULL, buf, count, NULL); -} - -/******************************************************************** - * - * Sysctl interface - * - ********************************************************************/ - -#ifdef CONFIG_SYSCTL - -#include - -static int min_write_thresh; -static int max_write_thresh = POOL_BITS; -static int random_min_urandom_seed = 60; -static char sysctl_bootid[16]; - /* - * This function is used to return both the bootid UUID, and random - * UUID. The difference is in whether table->data is NULL; if it is, - * then a new UUID is generated and returned to the user. - * - * If the user accesses this via the proc interface, the UUID will be - * returned as an ASCII string in the standard UUID format; if via the - * sysctl system call, as 16 bytes of binary data. + * Batched entropy returns random integers. The quality of the random + * number is good as /dev/urandom. In order to ensure that the randomness + * provided by this function is okay, the function wait_for_random_bytes() + * should be called and return 0 at least once at any point prior. */ -static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) -{ - struct ctl_table fake_table; - unsigned char buf[64], tmp_uuid[16], *uuid; - - uuid = table->data; - if (!uuid) { - uuid = tmp_uuid; - generate_random_uuid(uuid); - } else { - static DEFINE_SPINLOCK(bootid_spinlock); - - spin_lock(&bootid_spinlock); - if (!uuid[8]) - generate_random_uuid(uuid); - spin_unlock(&bootid_spinlock); - } - - sprintf(buf, "%pU", uuid); - - fake_table.data = buf; - fake_table.maxlen = sizeof(buf); - - return proc_dostring(&fake_table, write, buffer, lenp, ppos); -} - -/* - * Return entropy available scaled to integral bits - */ -static int proc_do_entropy(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) -{ - struct ctl_table fake_table; - int entropy_count; - - entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT; - - fake_table.data = &entropy_count; - fake_table.maxlen = sizeof(entropy_count); - - return proc_dointvec(&fake_table, write, buffer, lenp, ppos); -} - -static int sysctl_poolsize = POOL_BITS; -static struct ctl_table random_table[] = { - { - .procname = "poolsize", - .data = &sysctl_poolsize, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "entropy_avail", - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_do_entropy, - .data = &input_pool.entropy_count, - }, - { - .procname = "write_wakeup_threshold", - .data = &random_write_wakeup_bits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_write_thresh, - .extra2 = &max_write_thresh, - }, - { - .procname = "urandom_min_reseed_secs", - .data = &random_min_urandom_seed, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "boot_id", - .data = &sysctl_bootid, - .maxlen = 16, - .mode = 0444, - .proc_handler = proc_do_uuid, - }, - { - .procname = "uuid", - .maxlen = 16, - .mode = 0444, - .proc_handler = proc_do_uuid, - }, -#ifdef ADD_INTERRUPT_BENCH - { - .procname = "add_interrupt_avg_cycles", - .data = &avg_cycles, - .maxlen = sizeof(avg_cycles), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "add_interrupt_avg_deviation", - .data = &avg_deviation, - .maxlen = sizeof(avg_deviation), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, -#endif - { } -}; - -/* - * rand_initialize() is called before sysctl_init(), - * so we cannot call register_sysctl_init() in rand_initialize() - */ -static int __init random_sysctls_init(void) -{ - register_sysctl_init("kernel/random", random_table); - return 0; -} -device_initcall(random_sysctls_init); -#endif /* CONFIG_SYSCTL */ - struct batched_entropy { union { - u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; + /* + * We make this 1.5x a ChaCha block, so that we get the + * remaining 32 bytes from fast key erasure, plus one full + * block from the detached ChaCha state. We can increase + * the size of this later if needed so long as we keep the + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. + */ + u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; + u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; }; + local_lock_t lock; + unsigned long generation; unsigned int position; - spinlock_t batch_lock; }; -/* - * Get a random word for internal kernel use only. The quality of the random - * number is good as /dev/urandom, but there is no backtrack protection, with - * the goal of being quite fast and not depleting entropy. In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once at any - * point prior. - */ + static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), + .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), + .position = UINT_MAX }; u64 get_random_u64(void) @@ -2097,68 +602,81 @@ u64 get_random_u64(void) unsigned long flags; struct batched_entropy *batch; static void *previous; + unsigned long next_gen; warn_unseeded_randomness(&previous); + local_lock_irqsave(&batched_entropy_u64.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u64); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { - extract_crng((u8 *)batch->entropy_u64); + + next_gen = READ_ONCE(base_crng.generation); + if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || + next_gen != batch->generation) { + _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); batch->position = 0; + batch->generation = next_gen; } - ret = batch->entropy_u64[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); + + ret = batch->entropy_u64[batch->position]; + batch->entropy_u64[batch->position] = 0; + ++batch->position; + local_unlock_irqrestore(&batched_entropy_u64.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u64); static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), + .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), + .position = UINT_MAX }; + u32 get_random_u32(void) { u32 ret; unsigned long flags; struct batched_entropy *batch; static void *previous; + unsigned long next_gen; warn_unseeded_randomness(&previous); + local_lock_irqsave(&batched_entropy_u32.lock, flags); batch = raw_cpu_ptr(&batched_entropy_u32); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { - extract_crng((u8 *)batch->entropy_u32); + + next_gen = READ_ONCE(base_crng.generation); + if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || + next_gen != batch->generation) { + _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); batch->position = 0; + batch->generation = next_gen; } - ret = batch->entropy_u32[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); + + ret = batch->entropy_u32[batch->position]; + batch->entropy_u32[batch->position] = 0; + ++batch->position; + local_unlock_irqrestore(&batched_entropy_u32.lock, flags); return ret; } EXPORT_SYMBOL(get_random_u32); -/* It's important to invalidate all potential batched entropy that might - * be stored before the crng is initialized, which we can do lazily by - * simply resetting the counter to zero so that it's re-extracted on the - * next usage. */ -static void invalidate_batched_entropy(void) +#ifdef CONFIG_SMP +/* + * This function is called when the CPU is coming up, with entry + * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. + */ +int random_prepare_cpu(unsigned int cpu) { - int cpu; - unsigned long flags; - - for_each_possible_cpu(cpu) { - struct batched_entropy *batched_entropy; - - batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); - spin_lock_irqsave(&batched_entropy->batch_lock, flags); - batched_entropy->position = 0; - spin_unlock(&batched_entropy->batch_lock); - - batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); - spin_lock(&batched_entropy->batch_lock); - batched_entropy->position = 0; - spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); - } + /* + * When the cpu comes back online, immediately invalidate both + * the per-cpu crng and all batches, so that we serve fresh + * randomness. + */ + per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX; + per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX; + per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX; + return 0; } +#endif /** * randomize_page - Generate a random, page aligned address @@ -2192,15 +710,416 @@ unsigned long randomize_page(unsigned long start, unsigned long range) return start + (get_random_long() % range << PAGE_SHIFT); } -/* Interface for in-kernel drivers of true hardware RNGs. +/* + * This function will use the architecture-specific hardware random + * number generator if it is available. It is not recommended for + * use. Use get_random_bytes() instead. It returns the number of + * bytes filled in. + */ +size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) +{ + size_t left = nbytes; + u8 *p = buf; + + while (left) { + unsigned long v; + size_t chunk = min_t(size_t, left, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, chunk); + p += chunk; + left -= chunk; + } + + return nbytes - left; +} +EXPORT_SYMBOL(get_random_bytes_arch); + + +/********************************************************************** + * + * Entropy accumulation and extraction routines. + * + * Callers may add entropy via: + * + * static void mix_pool_bytes(const void *in, size_t nbytes) + * + * After which, if added entropy should be credited: + * + * static void credit_entropy_bits(size_t nbits) + * + * Finally, extract entropy via these two, with the latter one + * setting the entropy count to zero and extracting only if there + * is POOL_MIN_BITS entropy credited prior or force is true: + * + * static void extract_entropy(void *buf, size_t nbytes) + * static bool drain_entropy(void *buf, size_t nbytes, bool force) + * + **********************************************************************/ + +enum { + POOL_BITS = BLAKE2S_HASH_SIZE * 8, + POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ +}; + +/* For notifying userspace should write into /dev/random. */ +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); + +static struct { + struct blake2s_state hash; + spinlock_t lock; + unsigned int entropy_count; +} input_pool = { + .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), + BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, + BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, + .hash.outlen = BLAKE2S_HASH_SIZE, + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), +}; + +static void _mix_pool_bytes(const void *in, size_t nbytes) +{ + blake2s_update(&input_pool.hash, in, nbytes); +} + +/* + * This function adds bytes into the entropy "pool". It does not + * update the entropy estimate. The caller should call + * credit_entropy_bits if this is appropriate. + */ +static void mix_pool_bytes(const void *in, size_t nbytes) +{ + unsigned long flags; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(in, nbytes); + spin_unlock_irqrestore(&input_pool.lock, flags); +} + +static void credit_entropy_bits(size_t nbits) +{ + unsigned int entropy_count, orig, add; + + if (!nbits) + return; + + add = min_t(size_t, nbits, POOL_BITS); + + do { + orig = READ_ONCE(input_pool.entropy_count); + entropy_count = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); + + if (!crng_ready() && entropy_count >= POOL_MIN_BITS) + crng_reseed(false); +} + +/* + * This is an HKDF-like construction for using the hashed collected entropy + * as a PRF key, that's then expanded block-by-block. + */ +static void extract_entropy(void *buf, size_t nbytes) +{ + unsigned long flags; + u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; + struct { + unsigned long rdseed[32 / sizeof(long)]; + size_t counter; + } block; + size_t i; + + for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { + if (!arch_get_random_seed_long(&block.rdseed[i]) && + !arch_get_random_long(&block.rdseed[i])) + block.rdseed[i] = random_get_entropy(); + } + + spin_lock_irqsave(&input_pool.lock, flags); + + /* seed = HASHPRF(last_key, entropy_input) */ + blake2s_final(&input_pool.hash, seed); + + /* next_key = HASHPRF(seed, RDSEED || 0) */ + block.counter = 0; + blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); + blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); + + spin_unlock_irqrestore(&input_pool.lock, flags); + memzero_explicit(next_key, sizeof(next_key)); + + while (nbytes) { + i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); + /* output = HASHPRF(seed, RDSEED || ++counter) */ + ++block.counter; + blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); + nbytes -= i; + buf += i; + } + + memzero_explicit(seed, sizeof(seed)); + memzero_explicit(&block, sizeof(block)); +} + +/* + * First we make sure we have POOL_MIN_BITS of entropy in the pool unless force + * is true, and then we set the entropy count to zero (but don't actually touch + * any data). Only then can we extract a new key with extract_entropy(). + */ +static bool drain_entropy(void *buf, size_t nbytes, bool force) +{ + unsigned int entropy_count; + do { + entropy_count = READ_ONCE(input_pool.entropy_count); + if (!force && entropy_count < POOL_MIN_BITS) + return false; + } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); + extract_entropy(buf, nbytes); + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + return true; +} + + +/********************************************************************** + * + * Entropy collection routines. + * + * The following exported functions are used for pushing entropy into + * the above entropy accumulation routines: + * + * void add_device_randomness(const void *buf, size_t size); + * void add_input_randomness(unsigned int type, unsigned int code, + * unsigned int value); + * void add_disk_randomness(struct gendisk *disk); + * void add_hwgenerator_randomness(const void *buffer, size_t count, + * size_t entropy); + * void add_bootloader_randomness(const void *buf, size_t size); + * void add_vmfork_randomness(const void *unique_vm_id, size_t size); + * void add_interrupt_randomness(int irq); + * + * add_device_randomness() adds data to the input pool that + * is likely to differ between two devices (or possibly even per boot). + * This would be things like MAC addresses or serial numbers, or the + * read-out of the RTC. This does *not* credit any actual entropy to + * the pool, but it initializes the pool to different values for devices + * that might otherwise be identical and have very little entropy + * available to them (particularly common in the embedded world). + * + * add_input_randomness() uses the input layer interrupt timing, as well + * as the event type information from the hardware. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. + * + * The above two routines try to estimate how many bits of entropy + * to credit. They do this by keeping track of the first and second + * order deltas of the event timings. + * + * add_hwgenerator_randomness() is for true hardware RNGs, and will credit + * entropy as specified by the caller. If the entropy pool is full it will + * block until more entropy is needed. + * + * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or + * add_device_randomness(), depending on whether or not the configuration + * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * + * add_vmfork_randomness() adds a unique (but not necessarily secret) ID + * representing the current instance of a VM to the pool, without crediting, + * and then force-reseeds the crng so that it takes effect immediately. + * + * add_interrupt_randomness() uses the interrupt timing as random + * inputs to the entropy pool. Using the cycle counters and the irq source + * as inputs, it feeds the input pool roughly once a second or after 64 + * interrupts, crediting 1 bit of entropy for whichever comes first. + * + **********************************************************************/ + +static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static int __init parse_trust_cpu(char *arg) +{ + return kstrtobool(arg, &trust_cpu); +} +early_param("random.trust_cpu", parse_trust_cpu); + +/* + * The first collection of entropy occurs at system boot while interrupts + * are still turned off. Here we push in RDSEED, a timestamp, and utsname(). + * Depending on the above configuration knob, RDSEED may be considered + * sufficient for initialization. Note that much earlier setup may already + * have pushed entropy into the input pool by the time we get here. + */ +int __init rand_initialize(void) +{ + size_t i; + ktime_t now = ktime_get_real(); + bool arch_init = true; + unsigned long rv; + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { + if (!arch_get_random_seed_long_early(&rv) && + !arch_get_random_long_early(&rv)) { + rv = random_get_entropy(); + arch_init = false; + } + _mix_pool_bytes(&rv, sizeof(rv)); + } + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(utsname(), sizeof(*(utsname()))); + + extract_entropy(base_crng.key, sizeof(base_crng.key)); + ++base_crng.generation; + + if (arch_init && trust_cpu && !crng_ready()) { + crng_init = 2; + pr_notice("crng init done (trusting CPU's manufacturer)\n"); + } + + if (ratelimit_disable) + unseeded_warning.interval = 0; + return 0; +} + +/* + * Add device- or boot-specific data to the input pool to help + * initialize it. + * + * None of this adds any entropy; it is meant to avoid the problem of + * the entropy pool having similar initial state across largely + * identical devices. + */ +void add_device_randomness(const void *buf, size_t size) +{ + cycles_t cycles = random_get_entropy(); + unsigned long flags, now = jiffies; + + if (crng_init == 0 && size) + crng_pre_init_inject(buf, size, false); + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&cycles, sizeof(cycles)); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(buf, size); + spin_unlock_irqrestore(&input_pool.lock, flags); +} +EXPORT_SYMBOL(add_device_randomness); + +/* There is one of these per entropy source */ +struct timer_rand_state { + unsigned long last_time; + long last_delta, last_delta2; +}; + +/* + * This function adds entropy to the entropy "pool" by using timing + * delays. It uses the timer_rand_state structure to make an estimate + * of how many bits of entropy this call has added to the pool. + * + * The number "num" is also added to the pool - it should somehow describe + * the type of event which just happened. This is currently 0-255 for + * keyboard scan codes, and 256 upwards for interrupts. + */ +static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) +{ + cycles_t cycles = random_get_entropy(); + unsigned long flags, now = jiffies; + long delta, delta2, delta3; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&cycles, sizeof(cycles)); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); + + /* + * Calculate number of bits of randomness we probably added. + * We take into account the first, second and third-order deltas + * in order to make our estimate. + */ + delta = now - READ_ONCE(state->last_time); + WRITE_ONCE(state->last_time, now); + + delta2 = delta - READ_ONCE(state->last_delta); + WRITE_ONCE(state->last_delta, delta); + + delta3 = delta2 - READ_ONCE(state->last_delta2); + WRITE_ONCE(state->last_delta2, delta2); + + if (delta < 0) + delta = -delta; + if (delta2 < 0) + delta2 = -delta2; + if (delta3 < 0) + delta3 = -delta3; + if (delta > delta2) + delta = delta2; + if (delta > delta3) + delta = delta3; + + /* + * delta is now minimum absolute delta. + * Round down by 1 bit on general principles, + * and limit entropy estimate to 12 bits. + */ + credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11)); +} + +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) +{ + static unsigned char last_value; + static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; + + /* Ignore autorepeat and the like. */ + if (value == last_value) + return; + + last_value = value; + add_timer_randomness(&input_timer_state, + (type << 4) ^ code ^ (code >> 4) ^ value); +} +EXPORT_SYMBOL_GPL(add_input_randomness); + +#ifdef CONFIG_BLOCK +void add_disk_randomness(struct gendisk *disk) +{ + if (!disk || !disk->random) + return; + /* First major is 1, so we get >= 0x200 here. */ + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); +} +EXPORT_SYMBOL_GPL(add_disk_randomness); + +void rand_initialize_disk(struct gendisk *disk) +{ + struct timer_rand_state *state; + + /* + * If kzalloc returns null, we just won't use that entropy + * source. + */ + state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); + if (state) { + state->last_time = INITIAL_JIFFIES; + disk->random = state; + } +} +#endif + +/* + * Interface for in-kernel drivers of true hardware RNGs. * Those devices may produce endless random bits and will be throttled * when our pool is full. */ -void add_hwgenerator_randomness(const char *buffer, size_t count, +void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0)) { - size_t ret = crng_fast_load(buffer, count); + size_t ret = crng_pre_init_inject(buffer, count, true); mix_pool_bytes(buffer, ret); count -= ret; buffer += ret; @@ -2208,26 +1127,28 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, return; } - /* Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below random_write_wakeup_thresh, - * when the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has lapsed. + /* + * Throttle writing if we're above the trickle threshold. + * We'll be woken up again once below POOL_MIN_BITS, when + * the calling thread is about to terminate, or once + * CRNG_RESEED_INTERVAL has elapsed. */ wait_event_interruptible_timeout(random_write_wait, !system_wq || kthread_should_stop() || - POOL_ENTROPY_BITS() <= random_write_wakeup_bits, + input_pool.entropy_count < POOL_MIN_BITS, CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); -/* Handle random seed passed by bootloader. +/* + * Handle random seed passed by bootloader. * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise * it would be regarded as device data. * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. */ -void add_bootloader_randomness(const void *buf, unsigned int size) +void add_bootloader_randomness(const void *buf, size_t size) { if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) add_hwgenerator_randomness(buf, size, size * 8); @@ -2235,3 +1156,583 @@ void add_bootloader_randomness(const void *buf, unsigned int size) add_device_randomness(buf, size); } EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +#if IS_ENABLED(CONFIG_VMGENID) +static BLOCKING_NOTIFIER_HEAD(vmfork_chain); + +/* + * Handle a new unique VM ID, which is unique, not secret, so we + * don't credit it, but we do immediately force a reseed after so + * that it's used by the crng posthaste. + */ +void add_vmfork_randomness(const void *unique_vm_id, size_t size) +{ + add_device_randomness(unique_vm_id, size); + if (crng_ready()) { + crng_reseed(true); + pr_notice("crng reseeded due to virtual machine fork\n"); + } + blocking_notifier_call_chain(&vmfork_chain, 0, NULL); +} +#if IS_MODULE(CONFIG_VMGENID) +EXPORT_SYMBOL_GPL(add_vmfork_randomness); +#endif + +int register_random_vmfork_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&vmfork_chain, nb); +} +EXPORT_SYMBOL_GPL(register_random_vmfork_notifier); + +int unregister_random_vmfork_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&vmfork_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier); +#endif + +struct fast_pool { + struct work_struct mix; + unsigned long pool[4]; + unsigned long last; + unsigned int count; + u16 reg_idx; +}; + +static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { +#ifdef CONFIG_64BIT + /* SipHash constants */ + .pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL, + 0x6c7967656e657261UL, 0x7465646279746573UL } +#else + /* HalfSipHash constants */ + .pool = { 0, 0, 0x6c796765U, 0x74656462U } +#endif +}; + +/* + * This is [Half]SipHash-1-x, starting from an empty key. Because + * the key is fixed, it assumes that its inputs are non-malicious, + * and therefore this has no security on its own. s represents the + * 128 or 256-bit SipHash state, while v represents a 128-bit input. + */ +static void fast_mix(unsigned long s[4], const unsigned long *v) +{ + size_t i; + + for (i = 0; i < 16 / sizeof(long); ++i) { + s[3] ^= v[i]; +#ifdef CONFIG_64BIT + s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); + s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2]; + s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0]; + s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32); +#else + s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); + s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; + s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; + s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); +#endif + s[0] ^= v[i]; + } +} + +#ifdef CONFIG_SMP +/* + * This function is called when the CPU has just come online, with + * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. + */ +int random_online_cpu(unsigned int cpu) +{ + /* + * During CPU shutdown and before CPU onlining, add_interrupt_ + * randomness() may schedule mix_interrupt_randomness(), and + * set the MIX_INFLIGHT flag. However, because the worker can + * be scheduled on a different CPU during this period, that + * flag will never be cleared. For that reason, we zero out + * the flag here, which runs just after workqueues are onlined + * for the CPU again. This also has the effect of setting the + * irq randomness count to zero so that new accumulated irqs + * are fresh. + */ + per_cpu_ptr(&irq_randomness, cpu)->count = 0; + return 0; +} +#endif + +static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs) +{ + unsigned long *ptr = (unsigned long *)regs; + unsigned int idx; + + if (regs == NULL) + return 0; + idx = READ_ONCE(f->reg_idx); + if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long)) + idx = 0; + ptr += idx++; + WRITE_ONCE(f->reg_idx, idx); + return *ptr; +} + +static void mix_interrupt_randomness(struct work_struct *work) +{ + struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); + /* + * The size of the copied stack pool is explicitly 16 bytes so that we + * tax mix_pool_byte()'s compression function the same amount on all + * platforms. This means on 64-bit we copy half the pool into this, + * while on 32-bit we copy all of it. The entropy is supposed to be + * sufficiently dispersed between bits that in the sponge-like + * half case, on average we don't wind up "losing" some. + */ + u8 pool[16]; + + /* Check to see if we're running on the wrong CPU due to hotplug. */ + local_irq_disable(); + if (fast_pool != this_cpu_ptr(&irq_randomness)) { + local_irq_enable(); + return; + } + + /* + * Copy the pool to the stack so that the mixer always has a + * consistent view, before we reenable irqs again. + */ + memcpy(pool, fast_pool->pool, sizeof(pool)); + fast_pool->count = 0; + fast_pool->last = jiffies; + local_irq_enable(); + + if (unlikely(crng_init == 0)) { + crng_pre_init_inject(pool, sizeof(pool), true); + mix_pool_bytes(pool, sizeof(pool)); + } else { + mix_pool_bytes(pool, sizeof(pool)); + credit_entropy_bits(1); + } + + memzero_explicit(pool, sizeof(pool)); +} + +void add_interrupt_randomness(int irq) +{ + enum { MIX_INFLIGHT = 1U << 31 }; + cycles_t cycles = random_get_entropy(); + unsigned long now = jiffies; + struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); + struct pt_regs *regs = get_irq_regs(); + unsigned int new_count; + union { + u32 u32[4]; + u64 u64[2]; + unsigned long longs[16 / sizeof(long)]; + } irq_data; + + if (cycles == 0) + cycles = get_reg(fast_pool, regs); + + if (sizeof(cycles) == 8) + irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; + else { + irq_data.u32[0] = cycles ^ irq; + irq_data.u32[1] = now; + } + + if (sizeof(unsigned long) == 8) + irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; + else { + irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; + irq_data.u32[3] = get_reg(fast_pool, regs); + } + + fast_mix(fast_pool->pool, irq_data.longs); + new_count = ++fast_pool->count; + + if (new_count & MIX_INFLIGHT) + return; + + if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) || + unlikely(crng_init == 0))) + return; + + if (unlikely(!fast_pool->mix.func)) + INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); + fast_pool->count |= MIX_INFLIGHT; + queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); +} +EXPORT_SYMBOL_GPL(add_interrupt_randomness); + +/* + * Each time the timer fires, we expect that we got an unpredictable + * jump in the cycle counter. Even if the timer is running on another + * CPU, the timer activity will be touching the stack of the CPU that is + * generating entropy.. + * + * Note that we don't re-arm the timer in the timer itself - we are + * happy to be scheduled away, since that just makes the load more + * complex, but we do not want the timer to keep ticking unless the + * entropy loop is running. + * + * So the re-arming always happens in the entropy loop itself. + */ +static void entropy_timer(struct timer_list *t) +{ + credit_entropy_bits(1); +} + +/* + * If we have an actual cycle counter, see if we can + * generate enough entropy with timing noise + */ +static void try_to_generate_entropy(void) +{ + struct { + cycles_t cycles; + struct timer_list timer; + } stack; + + stack.cycles = random_get_entropy(); + + /* Slow counter - or none. Don't even bother */ + if (stack.cycles == random_get_entropy()) + return; + + timer_setup_on_stack(&stack.timer, entropy_timer, 0); + while (!crng_ready() && !signal_pending(current)) { + if (!timer_pending(&stack.timer)) + mod_timer(&stack.timer, jiffies + 1); + mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); + schedule(); + stack.cycles = random_get_entropy(); + } + + del_timer_sync(&stack.timer); + destroy_timer_on_stack(&stack.timer); + mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); +} + + +/********************************************************************** + * + * Userspace reader/writer interfaces. + * + * getrandom(2) is the primary modern interface into the RNG and should + * be used in preference to anything else. + * + * Reading from /dev/random and /dev/urandom both have the same effect + * as calling getrandom(2) with flags=0. (In earlier versions, however, + * they each had different semantics.) + * + * Writing to either /dev/random or /dev/urandom adds entropy to + * the input pool but does not credit it. + * + * Polling on /dev/random or /dev/urandom indicates when the RNG + * is initialized, on the read side, and when it wants new entropy, + * on the write side. + * + * Both /dev/random and /dev/urandom have the same set of ioctls for + * adding entropy, getting the entropy count, zeroing the count, and + * reseeding the crng. + * + **********************************************************************/ + +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, + flags) +{ + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) + return -EINVAL; + + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) + return -EINVAL; + + if (count > INT_MAX) + count = INT_MAX; + + if (!(flags & GRND_INSECURE) && !crng_ready()) { + int ret; + + if (flags & GRND_NONBLOCK) + return -EAGAIN; + ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + } + return get_random_bytes_user(buf, count); +} + +static __poll_t random_poll(struct file *file, poll_table *wait) +{ + __poll_t mask; + + poll_wait(file, &crng_init_wait, wait); + poll_wait(file, &random_write_wait, wait); + mask = 0; + if (crng_ready()) + mask |= EPOLLIN | EPOLLRDNORM; + if (input_pool.entropy_count < POOL_MIN_BITS) + mask |= EPOLLOUT | EPOLLWRNORM; + return mask; +} + +static int write_pool(const char __user *ubuf, size_t count) +{ + size_t len; + int ret = 0; + u8 block[BLAKE2S_BLOCK_SIZE]; + + while (count) { + len = min(count, sizeof(block)); + if (copy_from_user(block, ubuf, len)) { + ret = -EFAULT; + goto out; + } + count -= len; + ubuf += len; + mix_pool_bytes(block, len); + cond_resched(); + } + +out: + memzero_explicit(block, sizeof(block)); + return ret; +} + +static ssize_t random_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + int ret; + + ret = write_pool(buffer, count); + if (ret) + return ret; + + return (ssize_t)count; +} + +static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + int ret; + + ret = wait_for_random_bytes(); + if (ret != 0) + return ret; + return get_random_bytes_user(buf, nbytes); +} + +static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + int size, ent_count; + int __user *p = (int __user *)arg; + int retval; + + switch (cmd) { + case RNDGETENTCNT: + /* Inherently racy, no point locking. */ + if (put_user(input_pool.entropy_count, p)) + return -EFAULT; + return 0; + case RNDADDTOENTCNT: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count, p)) + return -EFAULT; + if (ent_count < 0) + return -EINVAL; + credit_entropy_bits(ent_count); + return 0; + case RNDADDENTROPY: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count, p++)) + return -EFAULT; + if (ent_count < 0) + return -EINVAL; + if (get_user(size, p++)) + return -EFAULT; + retval = write_pool((const char __user *)p, size); + if (retval < 0) + return retval; + credit_entropy_bits(ent_count); + return 0; + case RNDZAPENTCNT: + case RNDCLEARPOOL: + /* + * Clear the entropy pool counters. We no longer clear + * the entropy pool, as that's silly. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } + return 0; + case RNDRESEEDCRNG: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!crng_ready()) + return -ENODATA; + crng_reseed(false); + return 0; + default: + return -EINVAL; + } +} + +static int random_fasync(int fd, struct file *filp, int on) +{ + return fasync_helper(fd, filp, on, &fasync); +} + +const struct file_operations random_fops = { + .read = random_read, + .write = random_write, + .poll = random_poll, + .unlocked_ioctl = random_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = random_fasync, + .llseek = noop_llseek, +}; + + +/******************************************************************** + * + * Sysctl interface. + * + * These are partly unused legacy knobs with dummy values to not break + * userspace and partly still useful things. They are usually accessible + * in /proc/sys/kernel/random/ and are as follows: + * + * - boot_id - a UUID representing the current boot. + * + * - uuid - a random UUID, different each time the file is read. + * + * - poolsize - the number of bits of entropy that the input pool can + * hold, tied to the POOL_BITS constant. + * + * - entropy_avail - the number of bits of entropy currently in the + * input pool. Always <= poolsize. + * + * - write_wakeup_threshold - the amount of entropy in the input pool + * below which write polls to /dev/random will unblock, requesting + * more entropy, tied to the POOL_MIN_BITS constant. It is writable + * to avoid breaking old userspaces, but writing to it does not + * change any behavior of the RNG. + * + * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL. + * It is writable to avoid breaking old userspaces, but writing + * to it does not change any behavior of the RNG. + * + ********************************************************************/ + +#ifdef CONFIG_SYSCTL + +#include + +static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; +static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; +static int sysctl_poolsize = POOL_BITS; +static u8 sysctl_bootid[UUID_SIZE]; + +/* + * This function is used to return both the bootid UUID, and random + * UUID. The difference is in whether table->data is NULL; if it is, + * then a new UUID is generated and returned to the user. + */ +static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + u8 tmp_uuid[UUID_SIZE], *uuid; + char uuid_string[UUID_STRING_LEN + 1]; + struct ctl_table fake_table = { + .data = uuid_string, + .maxlen = UUID_STRING_LEN + }; + + if (write) + return -EPERM; + + uuid = table->data; + if (!uuid) { + uuid = tmp_uuid; + generate_random_uuid(uuid); + } else { + static DEFINE_SPINLOCK(bootid_spinlock); + + spin_lock(&bootid_spinlock); + if (!uuid[8]) + generate_random_uuid(uuid); + spin_unlock(&bootid_spinlock); + } + + snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); + return proc_dostring(&fake_table, 0, buffer, lenp, ppos); +} + +/* The same as proc_dointvec, but writes don't change anything. */ +static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos); +} + +static struct ctl_table random_table[] = { + { + .procname = "poolsize", + .data = &sysctl_poolsize, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "entropy_avail", + .data = &input_pool.entropy_count, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "write_wakeup_threshold", + .data = &sysctl_random_write_wakeup_bits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_do_rointvec, + }, + { + .procname = "urandom_min_reseed_secs", + .data = &sysctl_random_min_urandom_seed, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_do_rointvec, + }, + { + .procname = "boot_id", + .data = &sysctl_bootid, + .mode = 0444, + .proc_handler = proc_do_uuid, + }, + { + .procname = "uuid", + .mode = 0444, + .proc_handler = proc_do_uuid, + }, + { } +}; + +/* + * rand_initialize() is called before sysctl_init(), + * so we cannot call register_sysctl_init() in rand_initialize() + */ +static int __init random_sysctls_init(void) +{ + register_sysctl_init("kernel/random", random_table); + return 0; +} +device_initcall(random_sysctls_init); +#endif diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index a46067c38bf5..0fad1331303c 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -59,9 +59,7 @@ out: return ret; } -#ifdef CONFIG_PM_SLEEP -static int wg_pm_notification(struct notifier_block *nb, unsigned long action, - void *data) +static int wg_pm_notification(struct notifier_block *nb, unsigned long action, void *data) { struct wg_device *wg; struct wg_peer *peer; @@ -92,7 +90,24 @@ static int wg_pm_notification(struct notifier_block *nb, unsigned long action, } static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; -#endif + +static int wg_vm_notification(struct notifier_block *nb, unsigned long action, void *data) +{ + struct wg_device *wg; + struct wg_peer *peer; + + rtnl_lock(); + list_for_each_entry(wg, &device_list, device_list) { + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_noise_expire_current_peer_keypairs(peer); + mutex_unlock(&wg->device_update_lock); + } + rtnl_unlock(); + return 0; +} + +static struct notifier_block vm_notifier = { .notifier_call = wg_vm_notification }; static int wg_stop(struct net_device *dev) { @@ -424,15 +439,17 @@ int __init wg_device_init(void) { int ret; -#ifdef CONFIG_PM_SLEEP ret = register_pm_notifier(&pm_notifier); if (ret) return ret; -#endif + + ret = register_random_vmfork_notifier(&vm_notifier); + if (ret) + goto error_pm; ret = register_pernet_device(&pernet_ops); if (ret) - goto error_pm; + goto error_vm; ret = rtnl_link_register(&link_ops); if (ret) @@ -442,10 +459,10 @@ int __init wg_device_init(void) error_pernet: unregister_pernet_device(&pernet_ops); +error_vm: + unregister_random_vmfork_notifier(&vm_notifier); error_pm: -#ifdef CONFIG_PM_SLEEP unregister_pm_notifier(&pm_notifier); -#endif return ret; } @@ -453,8 +470,7 @@ void wg_device_uninit(void) { rtnl_link_unregister(&link_ops); unregister_pernet_device(&pernet_ops); -#ifdef CONFIG_PM_SLEEP + unregister_random_vmfork_notifier(&vm_notifier); unregister_pm_notifier(&pm_notifier); -#endif rcu_barrier(); } diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index 8061e8ef449f..121b9293c737 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -13,6 +13,17 @@ menuconfig VIRT_DRIVERS if VIRT_DRIVERS +config VMGENID + tristate "Virtual Machine Generation ID driver" + default y + depends on ACPI + help + Say Y here to use the hypervisor-provided Virtual Machine Generation ID + to reseed the RNG when the VM is cloned. This is highly recommended if + you intend to do any rollback / cloning / snapshotting of VMs. + + Prefer Y to M so that this protection is activated very early. + config FSL_HV_MANAGER tristate "Freescale hypervisor management driver" depends on FSL_SOC diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile index 3e272ea60cd9..108d0ffcc9aa 100644 --- a/drivers/virt/Makefile +++ b/drivers/virt/Makefile @@ -4,6 +4,7 @@ # obj-$(CONFIG_FSL_HV_MANAGER) += fsl_hypervisor.o +obj-$(CONFIG_VMGENID) += vmgenid.o obj-y += vboxguest/ obj-$(CONFIG_NITRO_ENCLAVES) += nitro_enclaves/ diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c new file mode 100644 index 000000000000..0ae1a39f2e28 --- /dev/null +++ b/drivers/virt/vmgenid.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Jason A. Donenfeld . All Rights Reserved. + * + * The "Virtual Machine Generation ID" is exposed via ACPI and changes when a + * virtual machine forks or is cloned. This driver exists for shepherding that + * information to random.c. + */ + +#include +#include +#include +#include + +ACPI_MODULE_NAME("vmgenid"); + +enum { VMGENID_SIZE = 16 }; + +struct vmgenid_state { + u8 *next_id; + u8 this_id[VMGENID_SIZE]; +}; + +static int vmgenid_add(struct acpi_device *device) +{ + struct acpi_buffer parsed = { ACPI_ALLOCATE_BUFFER }; + struct vmgenid_state *state; + union acpi_object *obj; + phys_addr_t phys_addr; + acpi_status status; + int ret = 0; + + state = devm_kmalloc(&device->dev, sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + status = acpi_evaluate_object(device->handle, "ADDR", NULL, &parsed); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR")); + return -ENODEV; + } + obj = parsed.pointer; + if (!obj || obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 2 || + obj->package.elements[0].type != ACPI_TYPE_INTEGER || + obj->package.elements[1].type != ACPI_TYPE_INTEGER) { + ret = -EINVAL; + goto out; + } + + phys_addr = (obj->package.elements[0].integer.value << 0) | + (obj->package.elements[1].integer.value << 32); + state->next_id = devm_memremap(&device->dev, phys_addr, VMGENID_SIZE, MEMREMAP_WB); + if (IS_ERR(state->next_id)) { + ret = PTR_ERR(state->next_id); + goto out; + } + + memcpy(state->this_id, state->next_id, sizeof(state->this_id)); + add_device_randomness(state->this_id, sizeof(state->this_id)); + + device->driver_data = state; + +out: + ACPI_FREE(parsed.pointer); + return ret; +} + +static void vmgenid_notify(struct acpi_device *device, u32 event) +{ + struct vmgenid_state *state = acpi_driver_data(device); + u8 old_id[VMGENID_SIZE]; + + memcpy(old_id, state->this_id, sizeof(old_id)); + memcpy(state->this_id, state->next_id, sizeof(state->this_id)); + if (!memcmp(old_id, state->this_id, sizeof(old_id))) + return; + add_vmfork_randomness(state->this_id, sizeof(state->this_id)); +} + +static const struct acpi_device_id vmgenid_ids[] = { + { "VM_GEN_COUNTER", 0 }, + { } +}; + +static struct acpi_driver vmgenid_driver = { + .name = "vmgenid", + .ids = vmgenid_ids, + .owner = THIS_MODULE, + .ops = { + .add = vmgenid_add, + .notify = vmgenid_notify + } +}; + +module_acpi_driver(vmgenid_driver); + +MODULE_DEVICE_TABLE(acpi, vmgenid_ids); +MODULE_DESCRIPTION("Virtual Machine Generation ID"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2bc550ac8dc7..c7dce7883179 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,6 +100,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, CPUHP_AP_DTPM_CPU_DEAD, + CPUHP_RANDOM_PREPARE, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -241,6 +242,7 @@ enum cpuhp_state { CPUHP_AP_PERF_CSKY_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, + CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8e6dd908da21..aa1d4da03538 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); -/** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4bb71979a8fd..5da5d990ff58 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -211,7 +211,7 @@ struct css_device_id { kernel_ulong_t driver_data; }; -#define ACPI_ID_LEN 9 +#define ACPI_ID_LEN 16 struct acpi_device_id { __u8 id[ACPI_ID_LEN]; diff --git a/include/linux/random.h b/include/linux/random.h index c45b2693e51f..c0baffe7afb1 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/linux/random.h - * - * Include file for the random number generator. - */ + #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H @@ -14,14 +10,10 @@ #include -struct random_ready_callback { - struct list_head list; - void (*func)(struct random_ready_callback *rdy); - struct module *owner; -}; +struct notifier_block; -extern void add_device_randomness(const void *, unsigned int); -extern void add_bootloader_randomness(const void *, unsigned int); +extern void add_device_randomness(const void *, size_t); +extern void add_bootloader_randomness(const void *, size_t); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) @@ -36,17 +28,27 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; +extern void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy); +#if IS_ENABLED(CONFIG_VMGENID) +extern void add_vmfork_randomness(const void *unique_vm_id, size_t size); +extern int register_random_vmfork_notifier(struct notifier_block *nb); +extern int unregister_random_vmfork_notifier(struct notifier_block *nb); +#else +static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; } +static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; } +#endif -extern void get_random_bytes(void *buf, int nbytes); +extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); extern int __init rand_initialize(void); extern bool rng_is_initialized(void); -extern int add_random_ready_callback(struct random_ready_callback *rdy); -extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern int __must_check get_random_bytes_arch(void *buf, int nbytes); +extern int register_random_ready_notifier(struct notifier_block *nb); +extern int unregister_random_ready_notifier(struct notifier_block *nb); +extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE -extern const struct file_operations random_fops, urandom_fops; +extern const struct file_operations random_fops; #endif u32 get_random_u32(void); @@ -87,7 +89,7 @@ static inline unsigned long get_random_canary(void) /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ -static inline int get_random_bytes_wait(void *buf, int nbytes) +static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); @@ -158,4 +160,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) } #endif +#ifdef CONFIG_SMP +extern int random_prepare_cpu(unsigned int cpu); +extern int random_online_cpu(unsigned int cpu); +#endif + #endif /* _LINUX_RANDOM_H */ diff --git a/include/trace/events/random.h b/include/trace/events/random.h deleted file mode 100644 index a2d9aa16a5d7..000000000000 --- a/include/trace/events/random.h +++ /dev/null @@ -1,233 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM random - -#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_RANDOM_H - -#include -#include - -TRACE_EVENT(add_device_randomness, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("bytes %d caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("input pool: bytes %d caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP) -); - -TRACE_EVENT(credit_entropy_bits, - TP_PROTO(int bits, int entropy_count, unsigned long IP), - - TP_ARGS(bits, entropy_count, IP), - - TP_STRUCT__entry( - __field( int, bits ) - __field( int, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bits = bits; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("input pool: bits %d entropy_count %d caller %pS", - __entry->bits, __entry->entropy_count, (void *)__entry->IP) -); - -TRACE_EVENT(debit_entropy, - TP_PROTO(int debit_bits), - - TP_ARGS( debit_bits), - - TP_STRUCT__entry( - __field( int, debit_bits ) - ), - - TP_fast_assign( - __entry->debit_bits = debit_bits; - ), - - TP_printk("input pool: debit_bits %d", __entry->debit_bits) -); - -TRACE_EVENT(add_input_randomness, - TP_PROTO(int input_bits), - - TP_ARGS(input_bits), - - TP_STRUCT__entry( - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->input_bits = input_bits; - ), - - TP_printk("input_pool_bits %d", __entry->input_bits) -); - -TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, int input_bits), - - TP_ARGS(dev, input_bits), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->input_bits = input_bits; - ), - - TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->input_bits) -); - -DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP), - - TP_STRUCT__entry( - __field( int, nbytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->IP = IP; - ), - - TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - - TP_ARGS(nbytes, entropy_count, IP), - - TP_STRUCT__entry( - __field( int, nbytes ) - __field( int, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("input pool: nbytes %d entropy_count %d caller %pS", - __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) -); - - -DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - - TP_ARGS(nbytes, entropy_count, IP) -); - -TRACE_EVENT(urandom_read, - TP_PROTO(int got_bits, int pool_left, int input_left), - - TP_ARGS(got_bits, pool_left, input_left), - - TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, pool_left ) - __field( int, input_left ) - ), - - TP_fast_assign( - __entry->got_bits = got_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; - ), - - TP_printk("got_bits %d nonblocking_pool_entropy_left %d " - "input_entropy_left %d", __entry->got_bits, - __entry->pool_left, __entry->input_left) -); - -TRACE_EVENT(prandom_u32, - - TP_PROTO(unsigned int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( unsigned int, ret) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret=%u" , __entry->ret) -); - -#endif /* _TRACE_RANDOM_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/cpu.c b/kernel/cpu.c index 407a2568f35e..238cba15449f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -1659,6 +1660,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, + [CPUHP_RANDOM_PREPARE] = { + .name = "random:prepare", + .startup.single = random_prepare_cpu, + .teardown.single = NULL, + }, [CPUHP_WORKQUEUE_PREP] = { .name = "workqueue:prepare", .startup.single = workqueue_prepare_cpu, @@ -1782,6 +1788,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = workqueue_online_cpu, .teardown.single = workqueue_offline_cpu, }, + [CPUHP_AP_RANDOM_ONLINE] = { + .name = "random:online", + .startup.single = random_online_cpu, + .teardown.single = NULL, + }, [CPUHP_AP_RCUTREE_ONLINE] = { .name = "RCU/tree:online", .startup.single = rcutree_online_cpu, diff --git a/lib/random32.c b/lib/random32.c index a57a0e18819d..976632003ec6 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -41,7 +41,6 @@ #include #include #include -#include /** * prandom_u32_state - seeded pseudo-random number generator. @@ -387,7 +386,6 @@ u32 prandom_u32(void) struct siprand_state *state = get_cpu_ptr(&net_rand_state); u32 res = siprand_u32(state); - trace_prandom_u32(res); put_cpu_ptr(&net_rand_state); return res; } @@ -553,9 +551,11 @@ static void prandom_reseed(struct timer_list *unused) * To avoid worrying about whether it's safe to delay that interrupt * long enough to seed all CPUs, just schedule an immediate timer event. */ -static void prandom_timer_start(struct random_ready_callback *unused) +static int prandom_timer_start(struct notifier_block *nb, + unsigned long action, void *data) { mod_timer(&seed_timer, jiffies); + return 0; } #ifdef CONFIG_RANDOM32_SELFTEST @@ -619,13 +619,13 @@ core_initcall(prandom32_state_selftest); */ static int __init prandom_init_late(void) { - static struct random_ready_callback random_ready = { - .func = prandom_timer_start + static struct notifier_block random_ready = { + .notifier_call = prandom_timer_start }; - int ret = add_random_ready_callback(&random_ready); + int ret = register_random_ready_notifier(&random_ready); if (ret == -EALREADY) { - prandom_timer_start(&random_ready); + prandom_timer_start(&random_ready, 0, NULL); ret = 0; } return ret; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b8129dd374c..36574a806a81 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -757,14 +757,16 @@ static void enable_ptr_key_workfn(struct work_struct *work) static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); -static void fill_random_ptr_key(struct random_ready_callback *unused) +static int fill_random_ptr_key(struct notifier_block *nb, + unsigned long action, void *data) { /* This may be in an interrupt handler. */ queue_work(system_unbound_wq, &enable_ptr_key_work); + return 0; } -static struct random_ready_callback random_ready = { - .func = fill_random_ptr_key +static struct notifier_block random_ready = { + .notifier_call = fill_random_ptr_key }; static int __init initialize_ptr_random(void) @@ -778,7 +780,7 @@ static int __init initialize_ptr_random(void) return 0; } - ret = add_random_ready_callback(&random_ready); + ret = register_random_ready_notifier(&random_ready); if (!ret) { return 0; } else if (ret == -EALREADY) {