In commit 9f480faec5 ("crypto: chacha20 - Fix keystream alignment for
chacha20_block()"), I had missed that chacha20_block() can be called
directly on the buffer passed to get_random_bytes(), which can have any
alignment.  So, while my commit didn't break anything, it didn't fully
solve the alignment problems.
Revert my solution and just update chacha20_block() to use
put_unaligned_le32(), so the output buffer need not be aligned.
This is simpler, and on many CPUs it's the same speed.
But, I kept the 'tmp' buffers in extract_crng_user() and
_get_random_bytes() 4-byte aligned, since that alignment is actually
needed for _crng_backtrack_protect() too.
Reported-by: Stephan Müller <smueller@chronox.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
		
	
			
		
			
				
	
	
		
			75 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			75 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * ChaCha20 256-bit cipher algorithm, RFC7539
 | 
						|
 *
 | 
						|
 * Copyright (C) 2015 Martin Willi
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License as published by
 | 
						|
 * the Free Software Foundation; either version 2 of the License, or
 | 
						|
 * (at your option) any later version.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/kernel.h>
 | 
						|
#include <linux/export.h>
 | 
						|
#include <linux/bitops.h>
 | 
						|
#include <linux/cryptohash.h>
 | 
						|
#include <asm/unaligned.h>
 | 
						|
#include <crypto/chacha20.h>
 | 
						|
 | 
						|
void chacha20_block(u32 *state, u8 *stream)
 | 
						|
{
 | 
						|
	u32 x[16];
 | 
						|
	int i;
 | 
						|
 | 
						|
	for (i = 0; i < ARRAY_SIZE(x); i++)
 | 
						|
		x[i] = state[i];
 | 
						|
 | 
						|
	for (i = 0; i < 20; i += 2) {
 | 
						|
		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
 | 
						|
		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
 | 
						|
		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
 | 
						|
		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
 | 
						|
 | 
						|
		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
 | 
						|
		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
 | 
						|
		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
 | 
						|
		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
 | 
						|
 | 
						|
		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
 | 
						|
		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
 | 
						|
		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
 | 
						|
		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
 | 
						|
 | 
						|
		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
 | 
						|
		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
 | 
						|
		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
 | 
						|
		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
 | 
						|
 | 
						|
		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
 | 
						|
		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
 | 
						|
		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
 | 
						|
		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
 | 
						|
 | 
						|
		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
 | 
						|
		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
 | 
						|
		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
 | 
						|
		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
 | 
						|
 | 
						|
		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
 | 
						|
		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
 | 
						|
		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
 | 
						|
		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
 | 
						|
 | 
						|
		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
 | 
						|
		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
 | 
						|
		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
 | 
						|
		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
 | 
						|
	}
 | 
						|
 | 
						|
	for (i = 0; i < ARRAY_SIZE(x); i++)
 | 
						|
		put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
 | 
						|
 | 
						|
	state[12]++;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(chacha20_block);
 |