Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 "Here is the crypto update for 4.1:

  New interfaces:
   - user-space interface for AEAD
   - user-space interface for RNG (i.e., pseudo RNG)

  New hashes:
   - ARMv8 SHA1/256
   - ARMv8 AES
   - ARMv8 GHASH
   - ARM assembler and NEON SHA256
   - MIPS OCTEON SHA1/256/512
   - MIPS img-hash SHA1/256 and MD5
   - Power 8 VMX AES/CBC/CTR/GHASH
   - PPC assembler AES, SHA1/256 and MD5
   - Broadcom IPROC RNG driver

  Cleanups/fixes:
   - prevent internal helper algos from being exposed to user-space
   - merge common code from assembly/C SHA implementations
   - misc fixes"

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (169 commits)
  crypto: arm - workaround for building with old binutils
  crypto: arm/sha256 - avoid sha256 code on ARMv7-M
  crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer
  crypto: x86/sha256_ssse3 - move SHA-224/256 SSSE3 implementation to base layer
  crypto: x86/sha1_ssse3 - move SHA-1 SSSE3 implementation to base layer
  crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
  crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer
  crypto: arm/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
  crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer
  crypto: arm/sha1-ce - move SHA-1 ARMv8 implementation to base layer
  crypto: arm/sha1_neon - move SHA-1 NEON implementation to base layer
  crypto: arm/sha1 - move SHA-1 ARM asm implementation to base layer
  crypto: sha512-generic - move to generic glue implementation
  crypto: sha256-generic - move to generic glue implementation
  crypto: sha1-generic - move to generic glue implementation
  crypto: sha512 - implement base layer for SHA-512
  crypto: sha256 - implement base layer for SHA-256
  crypto: sha1 - implement base layer for SHA-1
  crypto: api - remove instance when test failed
  crypto: api - Move alg ref count init to crypto_check_alg
  ...
This commit is contained in:
Linus Torvalds 2015-04-15 10:42:15 -07:00
commit cb906953d2
168 changed files with 18245 additions and 2224 deletions

View File

@ -509,6 +509,270 @@
select it due to the used type and mask field.
</para>
</sect1>
<sect1><title>Internal Structure of Kernel Crypto API</title>
<para>
The kernel crypto API has an internal structure where a cipher
implementation may use many layers and indirections. This section
shall help to clarify how the kernel crypto API uses
various components to implement the complete cipher.
</para>
<para>
The following subsections explain the internal structure based
on existing cipher implementations. The first section addresses
the most complex scenario where all other scenarios form a logical
subset.
</para>
<sect2><title>Generic AEAD Cipher Structure</title>
<para>
The following ASCII art decomposes the kernel crypto API layers
when using the AEAD cipher with the automated IV generation. The
shown example is used by the IPSEC layer.
</para>
<para>
For other use cases of AEAD ciphers, the ASCII art applies as
well, but the caller may not use the GIVCIPHER interface. In
this case, the caller must generate the IV.
</para>
<para>
The depicted example decomposes the AEAD cipher of GCM(AES) based
on the generic C implementations (gcm.c, aes-generic.c, ctr.c,
ghash-generic.c, seqiv.c). The generic implementation serves as an
example showing the complete logic of the kernel crypto API.
</para>
<para>
It is possible that some streamlined cipher implementations (like
AES-NI) provide implementations merging aspects which in the view
of the kernel crypto API cannot be decomposed into layers any more.
In case of the AES-NI implementation, the CTR mode, the GHASH
implementation and the AES cipher are all merged into one cipher
implementation registered with the kernel crypto API. In this case,
the concept described by the following ASCII art applies too. However,
the decomposition of GCM into the individual sub-components
by the kernel crypto API is not done any more.
</para>
<para>
Each block in the following ASCII art is an independent cipher
instance obtained from the kernel crypto API. Each block
is accessed by the caller or by other blocks using the API functions
defined by the kernel crypto API for the cipher implementation type.
</para>
<para>
The blocks below indicate the cipher type as well as the specific
logic implemented in the cipher.
</para>
<para>
The ASCII art picture also indicates the call structure, i.e. who
calls which component. The arrows point to the invoked block
where the caller uses the API applicable to the cipher type
specified for the block.
</para>
<programlisting>
<![CDATA[
kernel crypto API | IPSEC Layer
|
+-----------+ |
| | (1)
| givcipher | <----------------------------------- esp_output
| (seqiv) | ---+
+-----------+ |
| (2)
+-----------+ |
| | <--+ (2)
| aead | <----------------------------------- esp_input
| (gcm) | ------------+
+-----------+ |
| (3) | (5)
v v
+-----------+ +-----------+
| | | |
| ablkcipher| | ahash |
| (ctr) | ---+ | (ghash) |
+-----------+ | +-----------+
|
+-----------+ | (4)
| | <--+
| cipher |
| (aes) |
+-----------+
]]>
</programlisting>
<para>
The following call sequence is applicable when the IPSEC layer
triggers an encryption operation with the esp_output function. During
configuration, the administrator set up the use of rfc4106(gcm(aes)) as
the cipher for ESP. The following call sequence is now depicted in the
ASCII art above:
</para>
<orderedlist>
<listitem>
<para>
esp_output() invokes crypto_aead_givencrypt() to trigger an encryption
operation of the GIVCIPHER implementation.
</para>
<para>
In case of GCM, the SEQIV implementation is registered as GIVCIPHER
in crypto_rfc4106_alloc().
</para>
<para>
The SEQIV performs its operation to generate an IV where the core
function is seqiv_geniv().
</para>
</listitem>
<listitem>
<para>
Now, SEQIV uses the AEAD API function calls to invoke the associated
AEAD cipher. In our case, during the instantiation of SEQIV, the
cipher handle for GCM is provided to SEQIV. This means that SEQIV
invokes AEAD cipher operations with the GCM cipher handle.
</para>
<para>
During instantiation of the GCM handle, the CTR(AES) and GHASH
ciphers are instantiated. The cipher handles for CTR(AES) and GHASH
are retained for later use.
</para>
<para>
The GCM implementation is responsible to invoke the CTR mode AES and
the GHASH cipher in the right manner to implement the GCM
specification.
</para>
</listitem>
<listitem>
<para>
The GCM AEAD cipher type implementation now invokes the ABLKCIPHER API
with the instantiated CTR(AES) cipher handle.
</para>
<para>
During instantiation of the CTR(AES) cipher, the CIPHER type
implementation of AES is instantiated. The cipher handle for AES is
retained.
</para>
<para>
That means that the ABLKCIPHER implementation of CTR(AES) only
implements the CTR block chaining mode. After performing the block
chaining operation, the CIPHER implementation of AES is invoked.
</para>
</listitem>
<listitem>
<para>
The ABLKCIPHER of CTR(AES) now invokes the CIPHER API with the AES
cipher handle to encrypt one block.
</para>
</listitem>
<listitem>
<para>
The GCM AEAD implementation also invokes the GHASH cipher
implementation via the AHASH API.
</para>
</listitem>
</orderedlist>
<para>
When the IPSEC layer triggers the esp_input() function, the same call
sequence is followed with the only difference that the operation starts
with step (2).
</para>
</sect2>
<sect2><title>Generic Block Cipher Structure</title>
<para>
Generic block ciphers follow the same concept as depicted with the ASCII
art picture above.
</para>
<para>
For example, CBC(AES) is implemented with cbc.c, and aes-generic.c. The
ASCII art picture above applies as well with the difference that only
step (4) is used and the ABLKCIPHER block chaining mode is CBC.
</para>
</sect2>
<sect2><title>Generic Keyed Message Digest Structure</title>
<para>
Keyed message digest implementations again follow the same concept as
depicted in the ASCII art picture above.
</para>
<para>
For example, HMAC(SHA256) is implemented with hmac.c and
sha256_generic.c. The following ASCII art illustrates the
implementation:
</para>
<programlisting>
<![CDATA[
kernel crypto API | Caller
|
+-----------+ (1) |
| | <------------------ some_function
| ahash |
| (hmac) | ---+
+-----------+ |
| (2)
+-----------+ |
| | <--+
| shash |
| (sha256) |
+-----------+
]]>
</programlisting>
<para>
The following call sequence is applicable when a caller triggers
an HMAC operation:
</para>
<orderedlist>
<listitem>
<para>
The AHASH API functions are invoked by the caller. The HMAC
implementation performs its operation as needed.
</para>
<para>
During initialization of the HMAC cipher, the SHASH cipher type of
SHA256 is instantiated. The cipher handle for the SHA256 instance is
retained.
</para>
<para>
At one time, the HMAC implementation requires a SHA256 operation
where the SHA256 cipher handle is used.
</para>
</listitem>
<listitem>
<para>
The HMAC instance now invokes the SHASH API with the SHA256
cipher handle to calculate the message digest.
</para>
</listitem>
</orderedlist>
</sect2>
</sect1>
</chapter>
<chapter id="Development"><title>Developing Cipher Algorithms</title>
@ -808,6 +1072,602 @@
</sect1>
</chapter>
<chapter id="User"><title>User Space Interface</title>
<sect1><title>Introduction</title>
<para>
The concepts of the kernel crypto API visible to kernel space is fully
applicable to the user space interface as well. Therefore, the kernel
crypto API high level discussion for the in-kernel use cases applies
here as well.
</para>
<para>
The major difference, however, is that user space can only act as a
consumer and never as a provider of a transformation or cipher algorithm.
</para>
<para>
The following covers the user space interface exported by the kernel
crypto API. A working example of this description is libkcapi that
can be obtained from [1]. That library can be used by user space
applications that require cryptographic services from the kernel.
</para>
<para>
Some details of the in-kernel kernel crypto API aspects do not
apply to user space, however. This includes the difference between
synchronous and asynchronous invocations. The user space API call
is fully synchronous.
</para>
<para>
[1] http://www.chronox.de/libkcapi.html
</para>
</sect1>
<sect1><title>User Space API General Remarks</title>
<para>
The kernel crypto API is accessible from user space. Currently,
the following ciphers are accessible:
</para>
<itemizedlist>
<listitem>
<para>Message digest including keyed message digest (HMAC, CMAC)</para>
</listitem>
<listitem>
<para>Symmetric ciphers</para>
</listitem>
<listitem>
<para>AEAD ciphers</para>
</listitem>
<listitem>
<para>Random Number Generators</para>
</listitem>
</itemizedlist>
<para>
The interface is provided via socket type using the type AF_ALG.
In addition, the setsockopt option type is SOL_ALG. In case the
user space header files do not export these flags yet, use the
following macros:
</para>
<programlisting>
#ifndef AF_ALG
#define AF_ALG 38
#endif
#ifndef SOL_ALG
#define SOL_ALG 279
#endif
</programlisting>
<para>
A cipher is accessed with the same name as done for the in-kernel
API calls. This includes the generic vs. unique naming schema for
ciphers as well as the enforcement of priorities for generic names.
</para>
<para>
To interact with the kernel crypto API, a socket must be
created by the user space application. User space invokes the cipher
operation with the send()/write() system call family. The result of the
cipher operation is obtained with the read()/recv() system call family.
</para>
<para>
The following API calls assume that the socket descriptor
is already opened by the user space application and discusses only
the kernel crypto API specific invocations.
</para>
<para>
To initialize the socket interface, the following sequence has to
be performed by the consumer:
</para>
<orderedlist>
<listitem>
<para>
Create a socket of type AF_ALG with the struct sockaddr_alg
parameter specified below for the different cipher types.
</para>
</listitem>
<listitem>
<para>
Invoke bind with the socket descriptor
</para>
</listitem>
<listitem>
<para>
Invoke accept with the socket descriptor. The accept system call
returns a new file descriptor that is to be used to interact with
the particular cipher instance. When invoking send/write or recv/read
system calls to send data to the kernel or obtain data from the
kernel, the file descriptor returned by accept must be used.
</para>
</listitem>
</orderedlist>
</sect1>
<sect1><title>In-place Cipher operation</title>
<para>
Just like the in-kernel operation of the kernel crypto API, the user
space interface allows the cipher operation in-place. That means that
the input buffer used for the send/write system call and the output
buffer used by the read/recv system call may be one and the same.
This is of particular interest for symmetric cipher operations where a
copying of the output data to its final destination can be avoided.
</para>
<para>
If a consumer on the other hand wants to maintain the plaintext and
the ciphertext in different memory locations, all a consumer needs
to do is to provide different memory pointers for the encryption and
decryption operation.
</para>
</sect1>
<sect1><title>Message Digest API</title>
<para>
The message digest type to be used for the cipher operation is
selected when invoking the bind syscall. bind requires the caller
to provide a filled struct sockaddr data structure. This data
structure must be filled as follows:
</para>
<programlisting>
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
.salg_type = "hash", /* this selects the hash logic in the kernel */
.salg_name = "sha1" /* this is the cipher name */
};
</programlisting>
<para>
The salg_type value "hash" applies to message digests and keyed
message digests. Though, a keyed message digest is referenced by
the appropriate salg_name. Please see below for the setsockopt
interface that explains how the key can be set for a keyed message
digest.
</para>
<para>
Using the send() system call, the application provides the data that
should be processed with the message digest. The send system call
allows the following flags to be specified:
</para>
<itemizedlist>
<listitem>
<para>
MSG_MORE: If this flag is set, the send system call acts like a
message digest update function where the final hash is not
yet calculated. If the flag is not set, the send system call
calculates the final message digest immediately.
</para>
</listitem>
</itemizedlist>
<para>
With the recv() system call, the application can read the message
digest from the kernel crypto API. If the buffer is too small for the
message digest, the flag MSG_TRUNC is set by the kernel.
</para>
<para>
In order to set a message digest key, the calling application must use
the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
operation is performed without the initial HMAC state change caused by
the key.
</para>
</sect1>
<sect1><title>Symmetric Cipher API</title>
<para>
The operation is very similar to the message digest discussion.
During initialization, the struct sockaddr data structure must be
filled as follows:
</para>
<programlisting>
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
.salg_type = "skcipher", /* this selects the symmetric cipher */
.salg_name = "cbc(aes)" /* this is the cipher name */
};
</programlisting>
<para>
Before data can be sent to the kernel using the write/send system
call family, the consumer must set the key. The key setting is
described with the setsockopt invocation below.
</para>
<para>
Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
specified with the data structure provided by the sendmsg() system call.
</para>
<para>
The sendmsg system call parameter of struct msghdr is embedded into the
struct cmsghdr data structure. See recv(2) and cmsg(3) for more
information on how the cmsghdr data structure is used together with the
send/recv system call family. That cmsghdr data structure holds the
following information specified with a separate header instances:
</para>
<itemizedlist>
<listitem>
<para>
specification of the cipher operation type with one of these flags:
</para>
<itemizedlist>
<listitem>
<para>ALG_OP_ENCRYPT - encryption of data</para>
</listitem>
<listitem>
<para>ALG_OP_DECRYPT - decryption of data</para>
</listitem>
</itemizedlist>
</listitem>
<listitem>
<para>
specification of the IV information marked with the flag ALG_SET_IV
</para>
</listitem>
</itemizedlist>
<para>
The send system call family allows the following flag to be specified:
</para>
<itemizedlist>
<listitem>
<para>
MSG_MORE: If this flag is set, the send system call acts like a
cipher update function where more input data is expected
with a subsequent invocation of the send system call.
</para>
</listitem>
</itemizedlist>
<para>
Note: The kernel reports -EINVAL for any unexpected data. The caller
must make sure that all data matches the constraints given in
/proc/crypto for the selected cipher.
</para>
<para>
With the recv() system call, the application can read the result of
the cipher operation from the kernel crypto API. The output buffer
must be at least as large as to hold all blocks of the encrypted or
decrypted data. If the output data size is smaller, only as many
blocks are returned that fit into that output buffer size.
</para>
</sect1>
<sect1><title>AEAD Cipher API</title>
<para>
The operation is very similar to the symmetric cipher discussion.
During initialization, the struct sockaddr data structure must be
filled as follows:
</para>
<programlisting>
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
.salg_type = "aead", /* this selects the symmetric cipher */
.salg_name = "gcm(aes)" /* this is the cipher name */
};
</programlisting>
<para>
Before data can be sent to the kernel using the write/send system
call family, the consumer must set the key. The key setting is
described with the setsockopt invocation below.
</para>
<para>
In addition, before data can be sent to the kernel using the
write/send system call family, the consumer must set the authentication
tag size. To set the authentication tag size, the caller must use the
setsockopt invocation described below.
</para>
<para>
Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
specified with the data structure provided by the sendmsg() system call.
</para>
<para>
The sendmsg system call parameter of struct msghdr is embedded into the
struct cmsghdr data structure. See recv(2) and cmsg(3) for more
information on how the cmsghdr data structure is used together with the
send/recv system call family. That cmsghdr data structure holds the
following information specified with a separate header instances:
</para>
<itemizedlist>
<listitem>
<para>
specification of the cipher operation type with one of these flags:
</para>
<itemizedlist>
<listitem>
<para>ALG_OP_ENCRYPT - encryption of data</para>
</listitem>
<listitem>
<para>ALG_OP_DECRYPT - decryption of data</para>
</listitem>
</itemizedlist>
</listitem>
<listitem>
<para>
specification of the IV information marked with the flag ALG_SET_IV
</para>
</listitem>
<listitem>
<para>
specification of the associated authentication data (AAD) with the
flag ALG_SET_AEAD_ASSOCLEN. The AAD is sent to the kernel together
with the plaintext / ciphertext. See below for the memory structure.
</para>
</listitem>
</itemizedlist>
<para>
The send system call family allows the following flag to be specified:
</para>
<itemizedlist>
<listitem>
<para>
MSG_MORE: If this flag is set, the send system call acts like a
cipher update function where more input data is expected
with a subsequent invocation of the send system call.
</para>
</listitem>
</itemizedlist>
<para>
Note: The kernel reports -EINVAL for any unexpected data. The caller
must make sure that all data matches the constraints given in
/proc/crypto for the selected cipher.
</para>
<para>
With the recv() system call, the application can read the result of
the cipher operation from the kernel crypto API. The output buffer
must be at least as large as defined with the memory structure below.
If the output data size is smaller, the cipher operation is not performed.
</para>
<para>
The authenticated decryption operation may indicate an integrity error.
Such breach in integrity is marked with the -EBADMSG error code.
</para>
<sect2><title>AEAD Memory Structure</title>
<para>
The AEAD cipher operates with the following information that
is communicated between user and kernel space as one data stream:
</para>
<itemizedlist>
<listitem>
<para>plaintext or ciphertext</para>
</listitem>
<listitem>
<para>associated authentication data (AAD)</para>
</listitem>
<listitem>
<para>authentication tag</para>
</listitem>
</itemizedlist>
<para>
The sizes of the AAD and the authentication tag are provided with
the sendmsg and setsockopt calls (see there). As the kernel knows
the size of the entire data stream, the kernel is now able to
calculate the right offsets of the data components in the data
stream.
</para>
<para>
The user space caller must arrange the aforementioned information
in the following order:
</para>
<itemizedlist>
<listitem>
<para>
AEAD encryption input: AAD || plaintext
</para>
</listitem>
<listitem>
<para>
AEAD decryption input: AAD || ciphertext || authentication tag
</para>
</listitem>
</itemizedlist>
<para>
The output buffer the user space caller provides must be at least as
large to hold the following data:
</para>
<itemizedlist>
<listitem>
<para>
AEAD encryption output: ciphertext || authentication tag
</para>
</listitem>
<listitem>
<para>
AEAD decryption output: plaintext
</para>
</listitem>
</itemizedlist>
</sect2>
</sect1>
<sect1><title>Random Number Generator API</title>
<para>
Again, the operation is very similar to the other APIs.
During initialization, the struct sockaddr data structure must be
filled as follows:
</para>
<programlisting>
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
.salg_type = "rng", /* this selects the symmetric cipher */
.salg_name = "drbg_nopr_sha256" /* this is the cipher name */
};
</programlisting>
<para>
Depending on the RNG type, the RNG must be seeded. The seed is provided
using the setsockopt interface to set the key. For example, the
ansi_cprng requires a seed. The DRBGs do not require a seed, but
may be seeded.
</para>
<para>
Using the read()/recvmsg() system calls, random numbers can be obtained.
The kernel generates at most 128 bytes in one call. If user space
requires more data, multiple calls to read()/recvmsg() must be made.
</para>
<para>
WARNING: The user space caller may invoke the initially mentioned
accept system call multiple times. In this case, the returned file
descriptors have the same state.
</para>
</sect1>
<sect1><title>Zero-Copy Interface</title>
<para>
In addition to the send/write/read/recv system call familty, the AF_ALG
interface can be accessed with the zero-copy interface of splice/vmsplice.
As the name indicates, the kernel tries to avoid a copy operation into
kernel space.
</para>
<para>
The zero-copy operation requires data to be aligned at the page boundary.
Non-aligned data can be used as well, but may require more operations of
the kernel which would defeat the speed gains obtained from the zero-copy
interface.
</para>
<para>
The system-interent limit for the size of one zero-copy operation is
16 pages. If more data is to be sent to AF_ALG, user space must slice
the input into segments with a maximum size of 16 pages.
</para>
<para>
Zero-copy can be used with the following code example (a complete working
example is provided with libkcapi):
</para>
<programlisting>
int pipes[2];
pipe(pipes);
/* input data in iov */
vmsplice(pipes[1], iov, iovlen, SPLICE_F_GIFT);
/* opfd is the file descriptor returned from accept() system call */
splice(pipes[0], NULL, opfd, NULL, ret, 0);
read(opfd, out, outlen);
</programlisting>
</sect1>
<sect1><title>Setsockopt Interface</title>
<para>
In addition to the read/recv and send/write system call handling
to send and retrieve data subject to the cipher operation, a consumer
also needs to set the additional information for the cipher operation.
This additional information is set using the setsockopt system call
that must be invoked with the file descriptor of the open cipher
(i.e. the file descriptor returned by the accept system call).
</para>
<para>
Each setsockopt invocation must use the level SOL_ALG.
</para>
<para>
The setsockopt interface allows setting the following data using
the mentioned optname:
</para>
<itemizedlist>
<listitem>
<para>
ALG_SET_KEY -- Setting the key. Key setting is applicable to:
</para>
<itemizedlist>
<listitem>
<para>the skcipher cipher type (symmetric ciphers)</para>
</listitem>
<listitem>
<para>the hash cipher type (keyed message digests)</para>
</listitem>
<listitem>
<para>the AEAD cipher type</para>
</listitem>
<listitem>
<para>the RNG cipher type to provide the seed</para>
</listitem>
</itemizedlist>
</listitem>
<listitem>
<para>
ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size
for AEAD ciphers. For a encryption operation, the authentication
tag of the given size will be generated. For a decryption operation,
the provided ciphertext is assumed to contain an authentication tag
of the given size (see section about AEAD memory layout below).
</para>
</listitem>
</itemizedlist>
</sect1>
<sect1><title>User space API example</title>
<para>
Please see [1] for libkcapi which provides an easy-to-use wrapper
around the aforementioned Netlink kernel interface. [1] also contains
a test application that invokes all libkcapi API calls.
</para>
<para>
[1] http://www.chronox.de/libkcapi.html
</para>
</sect1>
</chapter>
<chapter id="API"><title>Programming Interface</title>
<sect1><title>Block Cipher Context Data Structures</title>
!Pinclude/linux/crypto.h Block Cipher Context Data Structures

View File

@ -1,205 +0,0 @@
Introduction
============
The concepts of the kernel crypto API visible to kernel space is fully
applicable to the user space interface as well. Therefore, the kernel crypto API
high level discussion for the in-kernel use cases applies here as well.
The major difference, however, is that user space can only act as a consumer
and never as a provider of a transformation or cipher algorithm.
The following covers the user space interface exported by the kernel crypto
API. A working example of this description is libkcapi that can be obtained from
[1]. That library can be used by user space applications that require
cryptographic services from the kernel.
Some details of the in-kernel kernel crypto API aspects do not
apply to user space, however. This includes the difference between synchronous
and asynchronous invocations. The user space API call is fully synchronous.
In addition, only a subset of all cipher types are available as documented
below.
User space API general remarks
==============================
The kernel crypto API is accessible from user space. Currently, the following
ciphers are accessible:
* Message digest including keyed message digest (HMAC, CMAC)
* Symmetric ciphers
Note, AEAD ciphers are currently not supported via the symmetric cipher
interface.
The interface is provided via Netlink using the type AF_ALG. In addition, the
setsockopt option type is SOL_ALG. In case the user space header files do not
export these flags yet, use the following macros:
#ifndef AF_ALG
#define AF_ALG 38
#endif
#ifndef SOL_ALG
#define SOL_ALG 279
#endif
A cipher is accessed with the same name as done for the in-kernel API calls.
This includes the generic vs. unique naming schema for ciphers as well as the
enforcement of priorities for generic names.
To interact with the kernel crypto API, a Netlink socket must be created by
the user space application. User space invokes the cipher operation with the
send/write system call family. The result of the cipher operation is obtained
with the read/recv system call family.
The following API calls assume that the Netlink socket descriptor is already
opened by the user space application and discusses only the kernel crypto API
specific invocations.
To initialize a Netlink interface, the following sequence has to be performed
by the consumer:
1. Create a socket of type AF_ALG with the struct sockaddr_alg parameter
specified below for the different cipher types.
2. Invoke bind with the socket descriptor
3. Invoke accept with the socket descriptor. The accept system call
returns a new file descriptor that is to be used to interact with
the particular cipher instance. When invoking send/write or recv/read
system calls to send data to the kernel or obtain data from the
kernel, the file descriptor returned by accept must be used.
In-place cipher operation
=========================
Just like the in-kernel operation of the kernel crypto API, the user space
interface allows the cipher operation in-place. That means that the input buffer
used for the send/write system call and the output buffer used by the read/recv
system call may be one and the same. This is of particular interest for
symmetric cipher operations where a copying of the output data to its final
destination can be avoided.
If a consumer on the other hand wants to maintain the plaintext and the
ciphertext in different memory locations, all a consumer needs to do is to
provide different memory pointers for the encryption and decryption operation.
Message digest API
==================
The message digest type to be used for the cipher operation is selected when
invoking the bind syscall. bind requires the caller to provide a filled
struct sockaddr data structure. This data structure must be filled as follows:
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
.salg_type = "hash", /* this selects the hash logic in the kernel */
.salg_name = "sha1" /* this is the cipher name */
};
The salg_type value "hash" applies to message digests and keyed message digests.
Though, a keyed message digest is referenced by the appropriate salg_name.
Please see below for the setsockopt interface that explains how the key can be
set for a keyed message digest.
Using the send() system call, the application provides the data that should be
processed with the message digest. The send system call allows the following
flags to be specified:
* MSG_MORE: If this flag is set, the send system call acts like a
message digest update function where the final hash is not
yet calculated. If the flag is not set, the send system call
calculates the final message digest immediately.
With the recv() system call, the application can read the message digest from
the kernel crypto API. If the buffer is too small for the message digest, the
flag MSG_TRUNC is set by the kernel.
In order to set a message digest key, the calling application must use the
setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC operation is
performed without the initial HMAC state change caused by the key.
Symmetric cipher API
====================
The operation is very similar to the message digest discussion. During
initialization, the struct sockaddr data structure must be filled as follows:
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
.salg_type = "skcipher", /* this selects the symmetric cipher */
.salg_name = "cbc(aes)" /* this is the cipher name */
};
Before data can be sent to the kernel using the write/send system call family,
the consumer must set the key. The key setting is described with the setsockopt
invocation below.
Using the sendmsg() system call, the application provides the data that should
be processed for encryption or decryption. In addition, the IV is specified
with the data structure provided by the sendmsg() system call.
The sendmsg system call parameter of struct msghdr is embedded into the
struct cmsghdr data structure. See recv(2) and cmsg(3) for more information
on how the cmsghdr data structure is used together with the send/recv system
call family. That cmsghdr data structure holds the following information
specified with a separate header instances:
* specification of the cipher operation type with one of these flags:
ALG_OP_ENCRYPT - encryption of data
ALG_OP_DECRYPT - decryption of data
* specification of the IV information marked with the flag ALG_SET_IV
The send system call family allows the following flag to be specified:
* MSG_MORE: If this flag is set, the send system call acts like a
cipher update function where more input data is expected
with a subsequent invocation of the send system call.
Note: The kernel reports -EINVAL for any unexpected data. The caller must
make sure that all data matches the constraints given in /proc/crypto for the
selected cipher.
With the recv() system call, the application can read the result of the
cipher operation from the kernel crypto API. The output buffer must be at least
as large as to hold all blocks of the encrypted or decrypted data. If the output
data size is smaller, only as many blocks are returned that fit into that
output buffer size.
Setsockopt interface
====================
In addition to the read/recv and send/write system call handling to send and
retrieve data subject to the cipher operation, a consumer also needs to set
the additional information for the cipher operation. This additional information
is set using the setsockopt system call that must be invoked with the file
descriptor of the open cipher (i.e. the file descriptor returned by the
accept system call).
Each setsockopt invocation must use the level SOL_ALG.
The setsockopt interface allows setting the following data using the mentioned
optname:
* ALG_SET_KEY -- Setting the key. Key setting is applicable to:
- the skcipher cipher type (symmetric ciphers)
- the hash cipher type (keyed message digests)
User space API example
======================
Please see [1] for libkcapi which provides an easy-to-use wrapper around the
aforementioned Netlink kernel interface. [1] also contains a test application
that invokes all libkcapi API calls.
[1] http://www.chronox.de/libkcapi.html
Author
======
Stephan Mueller <smueller@chronox.de>

View File

@ -0,0 +1,27 @@
Imagination Technologies hardware hash accelerator
The hash accelerator provides hardware hashing acceleration for
SHA1, SHA224, SHA256 and MD5 hashes
Required properties:
- compatible : "img,hash-accelerator"
- reg : Offset and length of the register set for the module, and the DMA port
- interrupts : The designated IRQ line for the hashing module.
- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
- dma-names : Should be "tx"
- clocks : Clock specifiers
- clock-names : "sys" Used to clock the hash block registers
"hash" Used to clock data through the accelerator
Example:
hash: hash@18149600 {
compatible = "img,hash-accelerator";
reg = <0x18149600 0x100>, <0x18101100 0x4>;
interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
dmas = <&dma 8 0xffffffff 0>;
dma-names = "tx";
clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
clock-names = "sys", "hash";
};

View File

@ -0,0 +1,12 @@
HWRNG support for the iproc-rng200 driver
Required properties:
- compatible : "brcm,iproc-rng200"
- reg : base address and size of control register block
Example:
rng {
compatible = "brcm,iproc-rng200";
reg = <0x18032000 0x28>;
};

View File

@ -2825,6 +2825,7 @@ L: linux-crypto@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
S: Maintained
F: Documentation/crypto/
F: Documentation/DocBook/crypto-API.tmpl
F: arch/*/crypto/
F: crypto/
F: drivers/crypto/

View File

@ -2175,6 +2175,9 @@ source "arch/arm/Kconfig.debug"
source "security/Kconfig"
source "crypto/Kconfig"
if CRYPTO
source "arch/arm/crypto/Kconfig"
endif
source "lib/Kconfig"

130
arch/arm/crypto/Kconfig Normal file
View File

@ -0,0 +1,130 @@
menuconfig ARM_CRYPTO
bool "ARM Accelerated Cryptographic Algorithms"
depends on ARM
help
Say Y here to choose from a selection of cryptographic algorithms
implemented using ARM specific CPU features or instructions.
if ARM_CRYPTO
config CRYPTO_SHA1_ARM
tristate "SHA1 digest algorithm (ARM-asm)"
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM assembler.
config CRYPTO_SHA1_ARM_NEON
tristate "SHA1 digest algorithm (ARM NEON)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA1_ARM
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM NEON assembly, when NEON instructions are
available.
config CRYPTO_SHA1_ARM_CE
tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA1_ARM
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using special ARMv8 Crypto Extensions.
config CRYPTO_SHA2_ARM_CE
tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA256_ARM
select CRYPTO_HASH
help
SHA-256 secure hash standard (DFIPS 180-2) implemented
using special ARMv8 Crypto Extensions.
config CRYPTO_SHA256_ARM
tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
select CRYPTO_HASH
depends on !CPU_V7M
help
SHA-256 secure hash standard (DFIPS 180-2) implemented
using optimized ARM assembler and NEON, when available.
config CRYPTO_SHA512_ARM_NEON
tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA512
select CRYPTO_HASH
help
SHA-512 secure hash standard (DFIPS 180-2) implemented
using ARM NEON instructions, when available.
This version of SHA implements a 512 bit hash with 256 bits of
security against collision attacks.
This code also includes SHA-384, a 384 bit hash with 192 bits
of security against collision attacks.
config CRYPTO_AES_ARM
tristate "AES cipher algorithms (ARM-asm)"
depends on ARM
select CRYPTO_ALGAPI
select CRYPTO_AES
help
Use optimized AES assembler routines for ARM platforms.
AES cipher algorithms (FIPS-197). AES uses the Rijndael
algorithm.
Rijndael appears to be consistently a very good performer in
both hardware and software across a wide range of computing
environments regardless of its use in feedback or non-feedback
modes. Its key setup time is excellent, and its key agility is
good. Rijndael's very low memory requirements make it very well
suited for restricted-space environments, in which it also
demonstrates excellent performance. Rijndael's operations are
among the easiest to defend against power and timing attacks.
The AES specifies three key sizes: 128, 192 and 256 bits
See <http://csrc.nist.gov/encryption/aes/> for more information.
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM
select CRYPTO_ABLK_HELPER
help
Use a faster and more secure NEON based implementation of AES in CBC,
CTR and XTS modes
Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
and for XTS mode encryption, CBC and XTS mode decryption speedup is
around 25%. (CBC encryption speed is not affected by this driver.)
This implementation does not rely on any lookup tables so it is
believed to be invulnerable to cache timing attacks.
config CRYPTO_AES_ARM_CE
tristate "Accelerated AES using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_ABLK_HELPER
help
Use an implementation of AES in CBC, CTR and XTS modes that uses
ARMv8 Crypto Extensions
config CRYPTO_GHASH_ARM_CE
tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_CRYPTD
help
Use an implementation of GHASH (used by the GCM AEAD chaining mode)
that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
that is part of the ARMv8 Crypto Extensions
endif

View File

@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
ifneq ($(ce-obj-y)$(ce-obj-m),)
ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
obj-y += $(ce-obj-y)
obj-m += $(ce-obj-m)
else
$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
$(warning $(ce-obj-y) $(ce-obj-m))
endif
endif
aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
@ -20,4 +42,7 @@ quiet_cmd_perl = PERL $@
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S

View File

@ -0,0 +1,518 @@
/*
* aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.fpu crypto-neon-fp-armv8
.align 3
.macro enc_round, state, key
aese.8 \state, \key
aesmc.8 \state, \state
.endm
.macro dec_round, state, key
aesd.8 \state, \key
aesimc.8 \state, \state
.endm
.macro enc_dround, key1, key2
enc_round q0, \key1
enc_round q0, \key2
.endm
.macro dec_dround, key1, key2
dec_round q0, \key1
dec_round q0, \key2
.endm
.macro enc_fround, key1, key2, key3
enc_round q0, \key1
aese.8 q0, \key2
veor q0, q0, \key3
.endm
.macro dec_fround, key1, key2, key3
dec_round q0, \key1
aesd.8 q0, \key2
veor q0, q0, \key3
.endm
.macro enc_dround_3x, key1, key2
enc_round q0, \key1
enc_round q1, \key1
enc_round q2, \key1
enc_round q0, \key2
enc_round q1, \key2
enc_round q2, \key2
.endm
.macro dec_dround_3x, key1, key2
dec_round q0, \key1
dec_round q1, \key1
dec_round q2, \key1
dec_round q0, \key2
dec_round q1, \key2
dec_round q2, \key2
.endm
.macro enc_fround_3x, key1, key2, key3
enc_round q0, \key1
enc_round q1, \key1
enc_round q2, \key1
aese.8 q0, \key2
aese.8 q1, \key2
aese.8 q2, \key2
veor q0, q0, \key3
veor q1, q1, \key3
veor q2, q2, \key3
.endm
.macro dec_fround_3x, key1, key2, key3
dec_round q0, \key1
dec_round q1, \key1
dec_round q2, \key1
aesd.8 q0, \key2
aesd.8 q1, \key2
aesd.8 q2, \key2
veor q0, q0, \key3
veor q1, q1, \key3
veor q2, q2, \key3
.endm
.macro do_block, dround, fround
cmp r3, #12 @ which key size?
vld1.8 {q10-q11}, [ip]!
\dround q8, q9
vld1.8 {q12-q13}, [ip]!
\dround q10, q11
vld1.8 {q10-q11}, [ip]!
\dround q12, q13
vld1.8 {q12-q13}, [ip]!
\dround q10, q11
blo 0f @ AES-128: 10 rounds
vld1.8 {q10-q11}, [ip]!
beq 1f @ AES-192: 12 rounds
\dround q12, q13
vld1.8 {q12-q13}, [ip]
\dround q10, q11
0: \fround q12, q13, q14
bx lr
1: \dround q12, q13
\fround q10, q11, q14
bx lr
.endm
/*
* Internal, non-AAPCS compliant functions that implement the core AES
* transforms. These should preserve all registers except q0 - q2 and ip
* Arguments:
* q0 : first in/output block
* q1 : second in/output block (_3x version only)
* q2 : third in/output block (_3x version only)
* q8 : first round key
* q9 : secound round key
* ip : address of 3rd round key
* q14 : final round key
* r3 : number of rounds
*/
.align 6
aes_encrypt:
add ip, r2, #32 @ 3rd round key
.Laes_encrypt_tweak:
do_block enc_dround, enc_fround
ENDPROC(aes_encrypt)
.align 6
aes_decrypt:
add ip, r2, #32 @ 3rd round key
do_block dec_dround, dec_fround
ENDPROC(aes_decrypt)
.align 6
aes_encrypt_3x:
add ip, r2, #32 @ 3rd round key
do_block enc_dround_3x, enc_fround_3x
ENDPROC(aes_encrypt_3x)
.align 6
aes_decrypt_3x:
add ip, r2, #32 @ 3rd round key
do_block dec_dround_3x, dec_fround_3x
ENDPROC(aes_decrypt_3x)
.macro prepare_key, rk, rounds
add ip, \rk, \rounds, lsl #4
vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
vld1.8 {q14}, [ip] @ load last round key
.endm
/*
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks)
* aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks)
*/
ENTRY(ce_aes_ecb_encrypt)
push {r4, lr}
ldr r4, [sp, #8]
prepare_key r2, r3
.Lecbencloop3x:
subs r4, r4, #3
bmi .Lecbenc1x
vld1.8 {q0-q1}, [r1, :64]!
vld1.8 {q2}, [r1, :64]!
bl aes_encrypt_3x
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
b .Lecbencloop3x
.Lecbenc1x:
adds r4, r4, #3
beq .Lecbencout
.Lecbencloop:
vld1.8 {q0}, [r1, :64]!
bl aes_encrypt
vst1.8 {q0}, [r0, :64]!
subs r4, r4, #1
bne .Lecbencloop
.Lecbencout:
pop {r4, pc}
ENDPROC(ce_aes_ecb_encrypt)
ENTRY(ce_aes_ecb_decrypt)
push {r4, lr}
ldr r4, [sp, #8]
prepare_key r2, r3
.Lecbdecloop3x:
subs r4, r4, #3
bmi .Lecbdec1x
vld1.8 {q0-q1}, [r1, :64]!
vld1.8 {q2}, [r1, :64]!
bl aes_decrypt_3x
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
b .Lecbdecloop3x
.Lecbdec1x:
adds r4, r4, #3
beq .Lecbdecout
.Lecbdecloop:
vld1.8 {q0}, [r1, :64]!
bl aes_decrypt
vst1.8 {q0}, [r0, :64]!
subs r4, r4, #1
bne .Lecbdecloop
.Lecbdecout:
pop {r4, pc}
ENDPROC(ce_aes_ecb_decrypt)
/*
* aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
*/
ENTRY(ce_aes_cbc_encrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
vld1.8 {q0}, [r5]
prepare_key r2, r3
.Lcbcencloop:
vld1.8 {q1}, [r1, :64]! @ get next pt block
veor q0, q0, q1 @ ..and xor with iv
bl aes_encrypt
vst1.8 {q0}, [r0, :64]!
subs r4, r4, #1
bne .Lcbcencloop
vst1.8 {q0}, [r5]
pop {r4-r6, pc}
ENDPROC(ce_aes_cbc_encrypt)
ENTRY(ce_aes_cbc_decrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
vld1.8 {q6}, [r5] @ keep iv in q6
prepare_key r2, r3
.Lcbcdecloop3x:
subs r4, r4, #3
bmi .Lcbcdec1x
vld1.8 {q0-q1}, [r1, :64]!
vld1.8 {q2}, [r1, :64]!
vmov q3, q0
vmov q4, q1
vmov q5, q2
bl aes_decrypt_3x
veor q0, q0, q6
veor q1, q1, q3
veor q2, q2, q4
vmov q6, q5
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
b .Lcbcdecloop3x
.Lcbcdec1x:
adds r4, r4, #3
beq .Lcbcdecout
vmov q15, q14 @ preserve last round key
.Lcbcdecloop:
vld1.8 {q0}, [r1, :64]! @ get next ct block
veor q14, q15, q6 @ combine prev ct with last key
vmov q6, q0
bl aes_decrypt
vst1.8 {q0}, [r0, :64]!
subs r4, r4, #1
bne .Lcbcdecloop
.Lcbcdecout:
vst1.8 {q6}, [r5] @ keep iv in q6
pop {r4-r6, pc}
ENDPROC(ce_aes_cbc_decrypt)
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 ctr[])
*/
ENTRY(ce_aes_ctr_encrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
vld1.8 {q6}, [r5] @ load ctr
prepare_key r2, r3
vmov r6, s27 @ keep swabbed ctr in r6
rev r6, r6
cmn r6, r4 @ 32 bit overflow?
bcs .Lctrloop
.Lctrloop3x:
subs r4, r4, #3
bmi .Lctr1x
add r6, r6, #1
vmov q0, q6
vmov q1, q6
rev ip, r6
add r6, r6, #1
vmov q2, q6
vmov s7, ip
rev ip, r6
add r6, r6, #1
vmov s11, ip
vld1.8 {q3-q4}, [r1, :64]!
vld1.8 {q5}, [r1, :64]!
bl aes_encrypt_3x
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
rev ip, r6
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
vmov s27, ip
b .Lctrloop3x
.Lctr1x:
adds r4, r4, #3
beq .Lctrout
.Lctrloop:
vmov q0, q6
bl aes_encrypt
subs r4, r4, #1
bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
vld1.8 {q3}, [r1, :64]!
veor q3, q0, q3
vst1.8 {q3}, [r0, :64]!
adds r6, r6, #1 @ increment BE ctr
rev ip, r6
vmov s27, ip
bcs .Lctrcarry
teq r4, #0
bne .Lctrloop
.Lctrout:
vst1.8 {q6}, [r5]
pop {r4-r6, pc}
.Lctrhalfblock:
vld1.8 {d1}, [r1, :64]
veor d0, d0, d1
vst1.8 {d0}, [r0, :64]
pop {r4-r6, pc}
.Lctrcarry:
.irp sreg, s26, s25, s24
vmov ip, \sreg @ load next word of ctr
rev ip, ip @ ... to handle the carry
adds ip, ip, #1
rev ip, ip
vmov \sreg, ip
bcc 0f
.endr
0: teq r4, #0
beq .Lctrout
b .Lctrloop
ENDPROC(ce_aes_ctr_encrypt)
/*
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
* int blocks, u8 iv[], u8 const rk2[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
* int blocks, u8 iv[], u8 const rk2[], int first)
*/
.macro next_tweak, out, in, const, tmp
vshr.s64 \tmp, \in, #63
vand \tmp, \tmp, \const
vadd.u64 \out, \in, \in
vext.8 \tmp, \tmp, \tmp, #8
veor \out, \out, \tmp
.endm
.align 3
.Lxts_mul_x:
.quad 1, 0x87
ce_aes_xts_init:
vldr d14, .Lxts_mul_x
vldr d15, .Lxts_mul_x + 8
ldrd r4, r5, [sp, #16] @ load args
ldr r6, [sp, #28]
vld1.8 {q0}, [r5] @ load iv
teq r6, #1 @ start of a block?
bxne lr
@ Encrypt the IV in q0 with the second AES key. This should only
@ be done at the start of a block.
ldr r6, [sp, #24] @ load AES key 2
prepare_key r6, r3
add ip, r6, #32 @ 3rd round key of key 2
b .Laes_encrypt_tweak @ tail call
ENDPROC(ce_aes_xts_init)
ENTRY(ce_aes_xts_encrypt)
push {r4-r6, lr}
bl ce_aes_xts_init @ run shared prologue
prepare_key r2, r3
vmov q3, q0
teq r6, #0 @ start of a block?
bne .Lxtsenc3x
.Lxtsencloop3x:
next_tweak q3, q3, q7, q6
.Lxtsenc3x:
subs r4, r4, #3
bmi .Lxtsenc1x
vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
vld1.8 {q2}, [r1, :64]!
next_tweak q4, q3, q7, q6
veor q0, q0, q3
next_tweak q5, q4, q7, q6
veor q1, q1, q4
veor q2, q2, q5
bl aes_encrypt_3x
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
vst1.8 {q2}, [r0, :64]!
vmov q3, q5
teq r4, #0
beq .Lxtsencout
b .Lxtsencloop3x
.Lxtsenc1x:
adds r4, r4, #3
beq .Lxtsencout
.Lxtsencloop:
vld1.8 {q0}, [r1, :64]!
veor q0, q0, q3
bl aes_encrypt
veor q0, q0, q3
vst1.8 {q0}, [r0, :64]!
subs r4, r4, #1
beq .Lxtsencout
next_tweak q3, q3, q7, q6
b .Lxtsencloop
.Lxtsencout:
vst1.8 {q3}, [r5]
pop {r4-r6, pc}
ENDPROC(ce_aes_xts_encrypt)
ENTRY(ce_aes_xts_decrypt)
push {r4-r6, lr}
bl ce_aes_xts_init @ run shared prologue
prepare_key r2, r3
vmov q3, q0
teq r6, #0 @ start of a block?
bne .Lxtsdec3x
.Lxtsdecloop3x:
next_tweak q3, q3, q7, q6
.Lxtsdec3x:
subs r4, r4, #3
bmi .Lxtsdec1x
vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
vld1.8 {q2}, [r1, :64]!
next_tweak q4, q3, q7, q6
veor q0, q0, q3
next_tweak q5, q4, q7, q6
veor q1, q1, q4
veor q2, q2, q5
bl aes_decrypt_3x
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
vst1.8 {q2}, [r0, :64]!
vmov q3, q5
teq r4, #0
beq .Lxtsdecout
b .Lxtsdecloop3x
.Lxtsdec1x:
adds r4, r4, #3
beq .Lxtsdecout
.Lxtsdecloop:
vld1.8 {q0}, [r1, :64]!
veor q0, q0, q3
add ip, r2, #32 @ 3rd round key
bl aes_decrypt
veor q0, q0, q3
vst1.8 {q0}, [r0, :64]!
subs r4, r4, #1
beq .Lxtsdecout
next_tweak q3, q3, q7, q6
b .Lxtsdecloop
.Lxtsdecout:
vst1.8 {q3}, [r5]
pop {r4-r6, pc}
ENDPROC(ce_aes_xts_decrypt)
/*
* u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
* AES sbox substitution on each byte in
* 'input'
*/
ENTRY(ce_aes_sub)
vdup.32 q1, r0
veor q0, q0, q0
aese.8 q0, q1
vmov r0, s0
bx lr
ENDPROC(ce_aes_sub)
/*
* void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
* operation on round key *src
*/
ENTRY(ce_aes_invert)
vld1.8 {q0}, [r1]
aesimc.8 q0, q0
vst1.8 {q0}, [r0]
bx lr
ENDPROC(ce_aes_invert)

View File

@ -0,0 +1,524 @@
/*
* aes-ce-glue.c - wrapper code for ARMv8 AES
*
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/hwcap.h>
#include <crypto/aes.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <linux/module.h>
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
/* defined in aes-ce-core.S */
asmlinkage u32 ce_aes_sub(u32 input);
asmlinkage void ce_aes_invert(void *dst, void *src);
asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks);
asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks);
asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 ctr[]);
asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 iv[],
u8 const rk2[], int first);
asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 iv[],
u8 const rk2[], int first);
struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
* # of rounds specified by AES:
* 128 bit key 10 rounds
* 192 bit key 12 rounds
* 256 bit key 14 rounds
* => n byte key => 6 + (n/4) rounds
*/
return 6 + ctx->key_length / 4;
}
static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
/*
* The AES key schedule round constants
*/
static u8 const rcon[] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
};
u32 kwords = key_len / sizeof(u32);
struct aes_block *key_enc, *key_dec;
int i, j;
if (key_len != AES_KEYSIZE_128 &&
key_len != AES_KEYSIZE_192 &&
key_len != AES_KEYSIZE_256)
return -EINVAL;
memcpy(ctx->key_enc, in_key, key_len);
ctx->key_length = key_len;
kernel_neon_begin();
for (i = 0; i < sizeof(rcon); i++) {
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
rko[0] = rko[0] ^ rki[0] ^ rcon[i];
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
if (key_len == AES_KEYSIZE_192) {
if (i >= 7)
break;
rko[4] = rko[3] ^ rki[4];
rko[5] = rko[4] ^ rki[5];
} else if (key_len == AES_KEYSIZE_256) {
if (i >= 6)
break;
rko[4] = ce_aes_sub(rko[3]) ^ rki[4];
rko[5] = rko[4] ^ rki[5];
rko[6] = rko[5] ^ rki[6];
rko[7] = rko[6] ^ rki[7];
}
}
/*
* Generate the decryption keys for the Equivalent Inverse Cipher.
* This involves reversing the order of the round keys, and applying
* the Inverse Mix Columns transformation on all but the first and
* the last one.
*/
key_enc = (struct aes_block *)ctx->key_enc;
key_dec = (struct aes_block *)ctx->key_dec;
j = num_rounds(ctx);
key_dec[0] = key_enc[j];
for (i = 1, j--; j > 0; i++, j--)
ce_aes_invert(key_dec + i, key_enc + j);
key_dec[i] = key_enc[0];
kernel_neon_end();
return 0;
}
static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int ret;
ret = ce_aes_expandkey(ctx, in_key, key_len);
if (!ret)
return 0;
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
};
static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
int ret;
ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2);
if (!ret)
ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2],
key_len / 2);
if (!ret)
return 0;
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, num_rounds(ctx), blocks);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, num_rounds(ctx), blocks,
walk.iv);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err, blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
nbytes -= blocks * AES_BLOCK_SIZE;
if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
break;
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
if (nbytes) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
u8 __aligned(8) tail[AES_BLOCK_SIZE];
/*
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
* to tell aes_ctr_encrypt() to only read half a block.
*/
blocks = (nbytes <= 8) ? -1 : 1;
ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
num_rounds(ctx), blocks, walk.iv);
memcpy(tdst, tail, nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = num_rounds(&ctx->key1);
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_enc, rounds, blocks,
walk.iv, (u8 *)ctx->key2.key_enc, first);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = num_rounds(&ctx->key1);
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_dec, rounds, blocks,
walk.iv, (u8 *)ctx->key2.key_enc, first);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static struct crypto_alg aes_algs[] = { {
.cra_name = "__ecb-aes-ce",
.cra_driver_name = "__driver-ecb-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
}, {
.cra_name = "__cbc-aes-ce",
.cra_driver_name = "__driver-cbc-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
}, {
.cra_name = "__ctr-aes-ce",
.cra_driver_name = "__driver-ctr-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
},
}, {
.cra_name = "__xts-aes-ce",
.cra_driver_name = "__driver-xts-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
}, {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
} };
static int __init aes_init(void)
{
if (!(elf_hwcap2 & HWCAP2_AES))
return -ENODEV;
return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
}
static void __exit aes_exit(void)
{
crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
}
module_init(aes_init);
module_exit(aes_exit);

View File

@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { {
.cra_name = "__cbc-aes-neonbs",
.cra_driver_name = "__driver-cbc-aes-neonbs",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.cra_alignmask = 7,
@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { {
.cra_name = "__ctr-aes-neonbs",
.cra_driver_name = "__driver-ctr-aes-neonbs",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
.cra_alignmask = 7,
@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { {
.cra_name = "__xts-aes-neonbs",
.cra_driver_name = "__driver-xts-aes-neonbs",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.cra_alignmask = 7,

View File

@ -0,0 +1,94 @@
/*
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
*
* Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
SHASH .req q0
SHASH2 .req q1
T1 .req q2
T2 .req q3
MASK .req q4
XL .req q5
XM .req q6
XH .req q7
IN1 .req q7
SHASH_L .req d0
SHASH_H .req d1
SHASH2_L .req d2
T1_L .req d4
MASK_L .req d8
XL_L .req d10
XL_H .req d11
XM_L .req d12
XM_H .req d13
XH_L .req d14
.text
.fpu crypto-neon-fp-armv8
/*
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update)
vld1.64 {SHASH}, [r3]
vld1.64 {XL}, [r1]
vmov.i8 MASK, #0xe1
vext.8 SHASH2, SHASH, SHASH, #8
vshl.u64 MASK, MASK, #57
veor SHASH2, SHASH2, SHASH
/* do the head block first, if supplied */
ldr ip, [sp]
teq ip, #0
beq 0f
vld1.64 {T1}, [ip]
teq r0, #0
b 1f
0: vld1.64 {T1}, [r2]!
subs r0, r0, #1
1: /* multiply XL by SHASH in GF(2^128) */
#ifndef CONFIG_CPU_BIG_ENDIAN
vrev64.8 T1, T1
#endif
vext.8 T2, XL, XL, #8
vext.8 IN1, T1, T1, #8
veor T1, T1, T2
veor XL, XL, IN1
vmull.p64 XH, SHASH_H, XL_H @ a1 * b1
veor T1, T1, XL
vmull.p64 XL, SHASH_L, XL_L @ a0 * b0
vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0)
vext.8 T1, XL, XH, #8
veor T2, XL, XH
veor XM, XM, T1
veor XM, XM, T2
vmull.p64 T2, XL_L, MASK_L
vmov XH_L, XM_H
vmov XM_H, XL_L
veor XL, XM, T2
vext.8 T2, XL, XL, #8
vmull.p64 XL, XL_L, MASK_L
veor T2, T2, XH
veor XL, XL, T2
bne 0b
vst1.64 {XL}, [r1]
bx lr
ENDPROC(pmull_ghash_update)

View File

@ -0,0 +1,320 @@
/*
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
*
* Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/cryptd.h>
#include <crypto/internal/hash.h>
#include <crypto/gf128mul.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
u64 a;
u64 b;
};
struct ghash_desc_ctx {
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
u8 buf[GHASH_BLOCK_SIZE];
u32 count;
};
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
struct ghash_key const *k, const char *head);
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
*ctx = (struct ghash_desc_ctx){};
return 0;
}
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
ctx->count += len;
if ((partial + len) >= GHASH_BLOCK_SIZE) {
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
int blocks;
if (partial) {
int p = GHASH_BLOCK_SIZE - partial;
memcpy(ctx->buf + partial, src, p);
src += p;
len -= p;
}
blocks = len / GHASH_BLOCK_SIZE;
len %= GHASH_BLOCK_SIZE;
kernel_neon_begin();
pmull_ghash_update(blocks, ctx->digest, src, key,
partial ? ctx->buf : NULL);
kernel_neon_end();
src += blocks * GHASH_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(ctx->buf + partial, src, len);
return 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
if (partial) {
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
kernel_neon_begin();
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
kernel_neon_end();
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
*ctx = (struct ghash_desc_ctx){};
return 0;
}
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
/* perform multiplication by 'x' in GF(2^128) */
b = get_unaligned_be64(inkey);
a = get_unaligned_be64(inkey + 8);
key->a = (a << 1) | (b >> 63);
key->b = (b << 1) | (a >> 63);
if (b >> 63)
key->b ^= 0xc200000000000000UL;
return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
.final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "__driver-ghash-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_key),
.cra_module = THIS_MODULE,
},
};
static int ghash_async_init(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!may_use_simd()) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_init(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return crypto_shash_init(desc);
}
}
static int ghash_async_update(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
if (!may_use_simd()) {
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_update(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
return shash_ahash_update(req, desc);
}
}
static int ghash_async_final(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
if (!may_use_simd()) {
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_final(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
return crypto_shash_final(desc, req->result);
}
}
static int ghash_async_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!may_use_simd()) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_digest(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return shash_ahash_digest(req, desc);
}
}
static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen)
{
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct crypto_ahash *child = &ctx->cryptd_tfm->base;
int err;
crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ahash_setkey(child, key, keylen);
crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
static int ghash_async_init_tfm(struct crypto_tfm *tfm)
{
struct cryptd_ahash *cryptd_tfm;
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct ahash_request) +
crypto_ahash_reqsize(&cryptd_tfm->base));
return 0;
}
static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
{
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ahash(ctx->cryptd_tfm);
}
static struct ahash_alg ghash_async_alg = {
.init = ghash_async_init,
.update = ghash_async_update,
.final = ghash_async_final,
.setkey = ghash_async_setkey,
.digest = ghash_async_digest,
.halg.digestsize = GHASH_DIGEST_SIZE,
.halg.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_type = &crypto_ahash_type,
.cra_ctxsize = sizeof(struct ghash_async_ctx),
.cra_module = THIS_MODULE,
.cra_init = ghash_async_init_tfm,
.cra_exit = ghash_async_exit_tfm,
},
};
static int __init ghash_ce_mod_init(void)
{
int err;
if (!(elf_hwcap2 & HWCAP2_PMULL))
return -ENODEV;
err = crypto_register_shash(&ghash_alg);
if (err)
return err;
err = crypto_register_ahash(&ghash_async_alg);
if (err)
goto err_shash;
return 0;
err_shash:
crypto_unregister_shash(&ghash_alg);
return err;
}
static void __exit ghash_ce_mod_exit(void)
{
crypto_unregister_ahash(&ghash_async_alg);
crypto_unregister_shash(&ghash_alg);
}
module_init(ghash_ce_mod_init);
module_exit(ghash_ce_mod_exit);

View File

@ -0,0 +1,125 @@
/*
* sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.fpu crypto-neon-fp-armv8
k0 .req q0
k1 .req q1
k2 .req q2
k3 .req q3
ta0 .req q4
ta1 .req q5
tb0 .req q5
tb1 .req q4
dga .req q6
dgb .req q7
dgbs .req s28
dg0 .req q12
dg1a0 .req q13
dg1a1 .req q14
dg1b0 .req q14
dg1b1 .req q13
.macro add_only, op, ev, rc, s0, dg1
.ifnb \s0
vadd.u32 tb\ev, q\s0, \rc
.endif
sha1h.32 dg1b\ev, dg0
.ifb \dg1
sha1\op\().32 dg0, dg1a\ev, ta\ev
.else
sha1\op\().32 dg0, \dg1, ta\ev
.endif
.endm
.macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
sha1su0.32 q\s0, q\s1, q\s2
add_only \op, \ev, \rc, \s1, \dg1
sha1su1.32 q\s0, q\s3
.endm
.align 6
.Lsha1_rcon:
.word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999
.word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1
.word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc
.word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
/*
* void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
* int blocks);
*/
ENTRY(sha1_ce_transform)
/* load round constants */
adr ip, .Lsha1_rcon
vld1.32 {k0-k1}, [ip, :128]!
vld1.32 {k2-k3}, [ip, :128]
/* load state */
vld1.32 {dga}, [r0]
vldr dgbs, [r0, #16]
/* load input */
0: vld1.32 {q8-q9}, [r1]!
vld1.32 {q10-q11}, [r1]!
subs r2, r2, #1
#ifndef CONFIG_CPU_BIG_ENDIAN
vrev32.8 q8, q8
vrev32.8 q9, q9
vrev32.8 q10, q10
vrev32.8 q11, q11
#endif
vadd.u32 ta0, q8, k0
vmov dg0, dga
add_update c, 0, k0, 8, 9, 10, 11, dgb
add_update c, 1, k0, 9, 10, 11, 8
add_update c, 0, k0, 10, 11, 8, 9
add_update c, 1, k0, 11, 8, 9, 10
add_update c, 0, k1, 8, 9, 10, 11
add_update p, 1, k1, 9, 10, 11, 8
add_update p, 0, k1, 10, 11, 8, 9
add_update p, 1, k1, 11, 8, 9, 10
add_update p, 0, k1, 8, 9, 10, 11
add_update p, 1, k2, 9, 10, 11, 8
add_update m, 0, k2, 10, 11, 8, 9
add_update m, 1, k2, 11, 8, 9, 10
add_update m, 0, k2, 8, 9, 10, 11
add_update m, 1, k2, 9, 10, 11, 8
add_update m, 0, k3, 10, 11, 8, 9
add_update p, 1, k3, 11, 8, 9, 10
add_only p, 0, k3, 9
add_only p, 1, k3, 10
add_only p, 0, k3, 11
add_only p, 1
/* update state */
vadd.u32 dga, dga, dg0
vadd.u32 dgb, dgb, dg1a0
bne 0b
/* store new state */
vst1.32 {dga}, [r0]
vstr dgbs, [r0, #16]
bx lr
ENDPROC(sha1_ce_transform)

View File

@ -0,0 +1,96 @@
/*
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include "sha1.h"
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
int blocks);
static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
if (!may_use_simd() ||
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return sha1_update_arm(desc, data, len);
kernel_neon_begin();
sha1_base_do_update(desc, data, len, sha1_ce_transform);
kernel_neon_end();
return 0;
}
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!may_use_simd())
return sha1_finup_arm(desc, data, len, out);
kernel_neon_begin();
if (len)
sha1_base_do_update(desc, data, len, sha1_ce_transform);
sha1_base_do_finalize(desc, sha1_ce_transform);
kernel_neon_end();
return sha1_base_finish(desc, out);
}
static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
return sha1_ce_finup(desc, NULL, 0, out);
}
static struct shash_alg alg = {
.init = sha1_base_init,
.update = sha1_ce_update,
.final = sha1_ce_final,
.finup = sha1_ce_finup,
.descsize = sizeof(struct sha1_state),
.digestsize = SHA1_DIGEST_SIZE,
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init sha1_ce_mod_init(void)
{
if (!(elf_hwcap2 & HWCAP2_SHA1))
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit sha1_ce_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(sha1_ce_mod_init);
module_exit(sha1_ce_mod_fini);

View File

@ -7,4 +7,7 @@
extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len);
extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out);
#endif

View File

@ -22,127 +22,47 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <asm/byteorder.h>
#include <asm/crypto/sha1.h>
#include "sha1.h"
asmlinkage void sha1_block_data_order(u32 *digest,
const unsigned char *data, unsigned int rounds);
static int sha1_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_update(struct sha1_state *sctx, const u8 *data,
unsigned int len, unsigned int partial)
{
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_block_data_order(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_block_data_order(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* make sure casting to sha1_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
return 0;
}
res = __sha1_update(sctx, data, len, partial);
return res;
return sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_block_data_order);
}
EXPORT_SYMBOL_GPL(sha1_update_arm);
/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
/* We need to fill a whole block for __sha1_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_update(sctx, padding, padlen, index);
}
__sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
return sha1_base_finish(desc, out);
}
static int sha1_export(struct shash_desc *desc, void *out)
int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_block_data_order);
return sha1_final(desc, out);
}
static int sha1_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
EXPORT_SYMBOL_GPL(sha1_finup_arm);
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_init,
.init = sha1_base_init,
.update = sha1_update_arm,
.final = sha1_final,
.export = sha1_export,
.import = sha1_import,
.finup = sha1_finup_arm,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-asm",

View File

@ -25,147 +25,60 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <crypto/sha1_base.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/crypto/sha1.h>
#include "sha1.h"
asmlinkage void sha1_transform_neon(void *state_h, const char *data,
unsigned int rounds);
static int sha1_neon_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_transform_neon(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_transform_neon(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
if (!may_use_simd() ||
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return sha1_update_arm(desc, data, len);
return 0;
}
kernel_neon_begin();
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_transform_neon);
kernel_neon_end();
if (!may_use_simd()) {
res = sha1_update_arm(desc, data, len);
} else {
kernel_neon_begin();
res = __sha1_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
return 0;
}
static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!may_use_simd())
return sha1_finup_arm(desc, data, len, out);
kernel_neon_begin();
if (len)
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_transform_neon);
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon);
kernel_neon_end();
return sha1_base_finish(desc, out);
}
/* Add padding and return the message digest. */
static int sha1_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
if (!may_use_simd()) {
sha1_update_arm(desc, padding, padlen);
sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha1_neon_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_neon_update(desc, padding, padlen, index);
}
__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha1_neon_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_neon_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
return sha1_neon_finup(desc, NULL, 0, out);
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_neon_init,
.init = sha1_base_init,
.update = sha1_neon_update,
.final = sha1_neon_final,
.export = sha1_neon_export,
.import = sha1_neon_import,
.finup = sha1_neon_finup,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-neon",

View File

@ -0,0 +1,125 @@
/*
* sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.fpu crypto-neon-fp-armv8
k0 .req q7
k1 .req q8
rk .req r3
ta0 .req q9
ta1 .req q10
tb0 .req q10
tb1 .req q9
dga .req q11
dgb .req q12
dg0 .req q13
dg1 .req q14
dg2 .req q15
.macro add_only, ev, s0
vmov dg2, dg0
.ifnb \s0
vld1.32 {k\ev}, [rk, :128]!
.endif
sha256h.32 dg0, dg1, tb\ev
sha256h2.32 dg1, dg2, tb\ev
.ifnb \s0
vadd.u32 ta\ev, q\s0, k\ev
.endif
.endm
.macro add_update, ev, s0, s1, s2, s3
sha256su0.32 q\s0, q\s1
add_only \ev, \s1
sha256su1.32 q\s0, q\s2, q\s3
.endm
.align 6
.Lsha256_rcon:
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
* void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
int blocks);
*/
ENTRY(sha2_ce_transform)
/* load state */
vld1.32 {dga-dgb}, [r0]
/* load input */
0: vld1.32 {q0-q1}, [r1]!
vld1.32 {q2-q3}, [r1]!
subs r2, r2, #1
#ifndef CONFIG_CPU_BIG_ENDIAN
vrev32.8 q0, q0
vrev32.8 q1, q1
vrev32.8 q2, q2
vrev32.8 q3, q3
#endif
/* load first round constant */
adr rk, .Lsha256_rcon
vld1.32 {k0}, [rk, :128]!
vadd.u32 ta0, q0, k0
vmov dg0, dga
vmov dg1, dgb
add_update 1, 0, 1, 2, 3
add_update 0, 1, 2, 3, 0
add_update 1, 2, 3, 0, 1
add_update 0, 3, 0, 1, 2
add_update 1, 0, 1, 2, 3
add_update 0, 1, 2, 3, 0
add_update 1, 2, 3, 0, 1
add_update 0, 3, 0, 1, 2
add_update 1, 0, 1, 2, 3
add_update 0, 1, 2, 3, 0
add_update 1, 2, 3, 0, 1
add_update 0, 3, 0, 1, 2
add_only 1, 1
add_only 0, 2
add_only 1, 3
add_only 0
/* update state */
vadd.u32 dga, dga, dg0
vadd.u32 dgb, dgb, dg1
bne 0b
/* store new state */
vst1.32 {dga-dgb}, [r0]
bx lr
ENDPROC(sha2_ce_transform)

View File

@ -0,0 +1,114 @@
/*
* sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <asm/hwcap.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include <asm/unaligned.h>
#include "sha256_glue.h"
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
int blocks);
static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
if (!may_use_simd() ||
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
return crypto_sha256_arm_update(desc, data, len);
kernel_neon_begin();
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
return 0;
}
static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!may_use_simd())
return crypto_sha256_arm_finup(desc, data, len, out);
kernel_neon_begin();
if (len)
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha2_ce_transform);
sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
return sha256_base_finish(desc, out);
}
static int sha2_ce_final(struct shash_desc *desc, u8 *out)
{
return sha2_ce_finup(desc, NULL, 0, out);
}
static struct shash_alg algs[] = { {
.init = sha224_base_init,
.update = sha2_ce_update,
.final = sha2_ce_final,
.finup = sha2_ce_finup,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA224_DIGEST_SIZE,
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.init = sha256_base_init,
.update = sha2_ce_update,
.final = sha2_ce_final,
.finup = sha2_ce_finup,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA256_DIGEST_SIZE,
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha2_ce_mod_init(void)
{
if (!(elf_hwcap2 & HWCAP2_SHA2))
return -ENODEV;
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
static void __exit sha2_ce_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha2_ce_mod_init);
module_exit(sha2_ce_mod_fini);

View File

@ -0,0 +1,716 @@
#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPL terms is granted.
# ====================================================================
# SHA256 block procedure for ARMv4. May 2007.
# Performance is ~2x better than gcc 3.4 generated code and in "abso-
# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
# byte [on single-issue Xscale PXA250 core].
# July 2010.
#
# Rescheduling for dual-issue pipeline resulted in 22% improvement on
# Cortex A8 core and ~20 cycles per processed byte.
# February 2011.
#
# Profiler-assisted and platform-specific optimization resulted in 16%
# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
# September 2013.
#
# Add NEON implementation. On Cortex A8 it was measured to process one
# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
# code (meaning that latter performs sub-optimally, nothing was done
# about it).
# May 2014.
#
# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$ctx="r0"; $t0="r0";
$inp="r1"; $t4="r1";
$len="r2"; $t1="r2";
$T1="r3"; $t3="r3";
$A="r4";
$B="r5";
$C="r6";
$D="r7";
$E="r8";
$F="r9";
$G="r10";
$H="r11";
@V=($A,$B,$C,$D,$E,$F,$G,$H);
$t2="r12";
$Ktbl="r14";
@Sigma0=( 2,13,22);
@Sigma1=( 6,11,25);
@sigma0=( 7,18, 3);
@sigma1=(17,19,10);
sub BODY_00_15 {
my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
$code.=<<___ if ($i<16);
#if __ARM_ARCH__>=7
@ ldr $t1,[$inp],#4 @ $i
# if $i==15
str $inp,[sp,#17*4] @ make room for $t4
# endif
eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
# ifndef __ARMEB__
rev $t1,$t1
# endif
#else
@ ldrb $t1,[$inp,#3] @ $i
add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
ldrb $t2,[$inp,#2]
ldrb $t0,[$inp,#1]
orr $t1,$t1,$t2,lsl#8
ldrb $t2,[$inp],#4
orr $t1,$t1,$t0,lsl#16
# if $i==15
str $inp,[sp,#17*4] @ make room for $t4
# endif
eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
orr $t1,$t1,$t2,lsl#24
eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
#endif
___
$code.=<<___;
ldr $t2,[$Ktbl],#4 @ *K256++
add $h,$h,$t1 @ h+=X[i]
str $t1,[sp,#`$i%16`*4]
eor $t1,$f,$g
add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
and $t1,$t1,$e
add $h,$h,$t2 @ h+=K256[i]
eor $t1,$t1,$g @ Ch(e,f,g)
eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
add $h,$h,$t1 @ h+=Ch(e,f,g)
#if $i==31
and $t2,$t2,#0xff
cmp $t2,#0xf2 @ done?
#endif
#if $i<15
# if __ARM_ARCH__>=7
ldr $t1,[$inp],#4 @ prefetch
# else
ldrb $t1,[$inp,#3]
# endif
eor $t2,$a,$b @ a^b, b^c in next round
#else
ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
eor $t2,$a,$b @ a^b, b^c in next round
ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
#endif
eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
and $t3,$t3,$t2 @ (b^c)&=(a^b)
add $d,$d,$h @ d+=h
eor $t3,$t3,$b @ Maj(a,b,c)
add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
@ add $h,$h,$t3 @ h+=Maj(a,b,c)
___
($t2,$t3)=($t3,$t2);
}
sub BODY_16_XX {
my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
$code.=<<___;
@ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
@ ldr $t4,[sp,#`($i+14)%16`*4]
mov $t0,$t1,ror#$sigma0[0]
add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
mov $t2,$t4,ror#$sigma1[0]
eor $t0,$t0,$t1,ror#$sigma0[1]
eor $t2,$t2,$t4,ror#$sigma1[1]
eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
ldr $t1,[sp,#`($i+0)%16`*4]
eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
ldr $t4,[sp,#`($i+9)%16`*4]
add $t2,$t2,$t0
eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
add $t1,$t1,$t2
eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
add $t1,$t1,$t4 @ X[i]
___
&BODY_00_15(@_);
}
$code=<<___;
#ifndef __KERNEL__
# include "arm_arch.h"
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
#endif
.text
#if __ARM_ARCH__<7
.code 32
#else
.syntax unified
# ifdef __thumb2__
# define adrl adr
.thumb
# else
.code 32
# endif
#endif
.type K256,%object
.align 5
K256:
.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
.word 0 @ terminator
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha256_block_data_order
#endif
.align 5
.global sha256_block_data_order
.type sha256_block_data_order,%function
sha256_block_data_order:
#if __ARM_ARCH__<7
sub r3,pc,#8 @ sha256_block_data_order
#else
adr r3,sha256_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#ARMV8_SHA256
bne .LARMv8
tst r12,#ARMV7_NEON
bne .LNEON
#endif
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
sub $Ktbl,r3,#256+32 @ K256
sub sp,sp,#16*4 @ alloca(X[16])
.Loop:
# if __ARM_ARCH__>=7
ldr $t1,[$inp],#4
# else
ldrb $t1,[$inp,#3]
# endif
eor $t3,$B,$C @ magic
eor $t2,$t2,$t2
___
for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
$code.=".Lrounds_16_xx:\n";
for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
$code.=<<___;
#if __ARM_ARCH__>=7
ite eq @ Thumb2 thing, sanity check in ARM
#endif
ldreq $t3,[sp,#16*4] @ pull ctx
bne .Lrounds_16_xx
add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
ldr $t0,[$t3,#0]
ldr $t1,[$t3,#4]
ldr $t2,[$t3,#8]
add $A,$A,$t0
ldr $t0,[$t3,#12]
add $B,$B,$t1
ldr $t1,[$t3,#16]
add $C,$C,$t2
ldr $t2,[$t3,#20]
add $D,$D,$t0
ldr $t0,[$t3,#24]
add $E,$E,$t1
ldr $t1,[$t3,#28]
add $F,$F,$t2
ldr $inp,[sp,#17*4] @ pull inp
ldr $t2,[sp,#18*4] @ pull inp+len
add $G,$G,$t0
add $H,$H,$t1
stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
cmp $inp,$t2
sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
bne .Loop
add sp,sp,#`16+3`*4 @ destroy frame
#if __ARM_ARCH__>=5
ldmia sp!,{r4-r11,pc}
#else
ldmia sp!,{r4-r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
.size sha256_block_data_order,.-sha256_block_data_order
___
######################################################################
# NEON stuff
#
{{{
my @X=map("q$_",(0..3));
my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
my $Xfer=$t4;
my $j=0;
sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
sub AUTOLOAD() # thunk [simplified] x86-style perlasm
{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
my $arg = pop;
$arg = "#$arg" if ($arg*1 eq $arg);
$code .= "\t$opcode\t".join(',',@_,$arg)."\n";
}
sub Xupdate()
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body);
my ($a,$b,$c,$d,$e,$f,$g,$h);
&vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
&vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T2,$T0,$sigma0[0]);
eval(shift(@insns));
eval(shift(@insns));
&vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T1,$T0,$sigma0[2]);
eval(shift(@insns));
eval(shift(@insns));
&vsli_32 ($T2,$T0,32-$sigma0[0]);
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T3,$T0,$sigma0[1]);
eval(shift(@insns));
eval(shift(@insns));
&veor ($T1,$T1,$T2);
eval(shift(@insns));
eval(shift(@insns));
&vsli_32 ($T3,$T0,32-$sigma0[1]);
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
eval(shift(@insns));
eval(shift(@insns));
&veor ($T1,$T1,$T3); # sigma0(X[1..4])
eval(shift(@insns));
eval(shift(@insns));
&vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
eval(shift(@insns));
eval(shift(@insns));
&vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
eval(shift(@insns));
eval(shift(@insns));
&veor ($T5,$T5,$T4);
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
eval(shift(@insns));
eval(shift(@insns));
&vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
eval(shift(@insns));
eval(shift(@insns));
&veor ($T5,$T5,$T4); # sigma1(X[14..15])
eval(shift(@insns));
eval(shift(@insns));
&vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
eval(shift(@insns));
eval(shift(@insns));
&vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
eval(shift(@insns));
eval(shift(@insns));
&veor ($T5,$T5,$T4);
eval(shift(@insns));
eval(shift(@insns));
&vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
eval(shift(@insns));
eval(shift(@insns));
&vld1_32 ("{$T0}","[$Ktbl,:128]!");
eval(shift(@insns));
eval(shift(@insns));
&vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
eval(shift(@insns));
eval(shift(@insns));
&veor ($T5,$T5,$T4); # sigma1(X[16..17])
eval(shift(@insns));
eval(shift(@insns));
&vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
eval(shift(@insns));
eval(shift(@insns));
&vadd_i32 ($T0,$T0,@X[0]);
while($#insns>=2) { eval(shift(@insns)); }
&vst1_32 ("{$T0}","[$Xfer,:128]!");
eval(shift(@insns));
eval(shift(@insns));
push(@X,shift(@X)); # "rotate" X[]
}
sub Xpreload()
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body);
my ($a,$b,$c,$d,$e,$f,$g,$h);
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
&vld1_32 ("{$T0}","[$Ktbl,:128]!");
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
&vrev32_8 (@X[0],@X[0]);
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
eval(shift(@insns));
&vadd_i32 ($T0,$T0,@X[0]);
foreach (@insns) { eval; } # remaining instructions
&vst1_32 ("{$T0}","[$Xfer,:128]!");
push(@X,shift(@X)); # "rotate" X[]
}
sub body_00_15 () {
(
'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
'&add ($h,$h,$t1)', # h+=X[i]+K[i]
'&eor ($t1,$f,$g)',
'&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
'&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
'&and ($t1,$t1,$e)',
'&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
'&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
'&eor ($t1,$t1,$g)', # Ch(e,f,g)
'&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
'&eor ($t2,$a,$b)', # a^b, b^c in next round
'&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
'&add ($h,$h,$t1)', # h+=Ch(e,f,g)
'&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
'&ldr ($t1,"[$Ktbl]") if ($j==15);'.
'&ldr ($t1,"[sp,#64]") if ($j==31)',
'&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
'&add ($d,$d,$h)', # d+=h
'&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
'&eor ($t3,$t3,$b)', # Maj(a,b,c)
'$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
)
}
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.global sha256_block_data_order_neon
.type sha256_block_data_order_neon,%function
.align 4
sha256_block_data_order_neon:
.LNEON:
stmdb sp!,{r4-r12,lr}
sub $H,sp,#16*4+16
adrl $Ktbl,K256
bic $H,$H,#15 @ align for 128-bit stores
mov $t2,sp
mov sp,$H @ alloca
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
vld1.8 {@X[0]},[$inp]!
vld1.8 {@X[1]},[$inp]!
vld1.8 {@X[2]},[$inp]!
vld1.8 {@X[3]},[$inp]!
vld1.32 {$T0},[$Ktbl,:128]!
vld1.32 {$T1},[$Ktbl,:128]!
vld1.32 {$T2},[$Ktbl,:128]!
vld1.32 {$T3},[$Ktbl,:128]!
vrev32.8 @X[0],@X[0] @ yes, even on
str $ctx,[sp,#64]
vrev32.8 @X[1],@X[1] @ big-endian
str $inp,[sp,#68]
mov $Xfer,sp
vrev32.8 @X[2],@X[2]
str $len,[sp,#72]
vrev32.8 @X[3],@X[3]
str $t2,[sp,#76] @ save original sp
vadd.i32 $T0,$T0,@X[0]
vadd.i32 $T1,$T1,@X[1]
vst1.32 {$T0},[$Xfer,:128]!
vadd.i32 $T2,$T2,@X[2]
vst1.32 {$T1},[$Xfer,:128]!
vadd.i32 $T3,$T3,@X[3]
vst1.32 {$T2},[$Xfer,:128]!
vst1.32 {$T3},[$Xfer,:128]!
ldmia $ctx,{$A-$H}
sub $Xfer,$Xfer,#64
ldr $t1,[sp,#0]
eor $t2,$t2,$t2
eor $t3,$B,$C
b .L_00_48
.align 4
.L_00_48:
___
&Xupdate(\&body_00_15);
&Xupdate(\&body_00_15);
&Xupdate(\&body_00_15);
&Xupdate(\&body_00_15);
$code.=<<___;
teq $t1,#0 @ check for K256 terminator
ldr $t1,[sp,#0]
sub $Xfer,$Xfer,#64
bne .L_00_48
ldr $inp,[sp,#68]
ldr $t0,[sp,#72]
sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
teq $inp,$t0
it eq
subeq $inp,$inp,#64 @ avoid SEGV
vld1.8 {@X[0]},[$inp]! @ load next input block
vld1.8 {@X[1]},[$inp]!
vld1.8 {@X[2]},[$inp]!
vld1.8 {@X[3]},[$inp]!
it ne
strne $inp,[sp,#68]
mov $Xfer,sp
___
&Xpreload(\&body_00_15);
&Xpreload(\&body_00_15);
&Xpreload(\&body_00_15);
&Xpreload(\&body_00_15);
$code.=<<___;
ldr $t0,[$t1,#0]
add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
ldr $t2,[$t1,#4]
ldr $t3,[$t1,#8]
ldr $t4,[$t1,#12]
add $A,$A,$t0 @ accumulate
ldr $t0,[$t1,#16]
add $B,$B,$t2
ldr $t2,[$t1,#20]
add $C,$C,$t3
ldr $t3,[$t1,#24]
add $D,$D,$t4
ldr $t4,[$t1,#28]
add $E,$E,$t0
str $A,[$t1],#4
add $F,$F,$t2
str $B,[$t1],#4
add $G,$G,$t3
str $C,[$t1],#4
add $H,$H,$t4
str $D,[$t1],#4
stmia $t1,{$E-$H}
ittte ne
movne $Xfer,sp
ldrne $t1,[sp,#0]
eorne $t2,$t2,$t2
ldreq sp,[sp,#76] @ restore original sp
itt ne
eorne $t3,$B,$C
bne .L_00_48
ldmia sp!,{r4-r12,pc}
.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
#endif
___
}}}
######################################################################
# ARMv8 stuff
#
{{{
my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
my @MSG=map("q$_",(8..11));
my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
my $Ktbl="r3";
$code.=<<___;
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
# ifdef __thumb2__
# define INST(a,b,c,d) .byte c,d|0xc,a,b
# else
# define INST(a,b,c,d) .byte a,b,c,d
# endif
.type sha256_block_data_order_armv8,%function
.align 5
sha256_block_data_order_armv8:
.LARMv8:
vld1.32 {$ABCD,$EFGH},[$ctx]
# ifdef __thumb2__
adr $Ktbl,.LARMv8
sub $Ktbl,$Ktbl,#.LARMv8-K256
# else
adrl $Ktbl,K256
# endif
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
.Loop_v8:
vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
vld1.32 {$W0},[$Ktbl]!
vrev32.8 @MSG[0],@MSG[0]
vrev32.8 @MSG[1],@MSG[1]
vrev32.8 @MSG[2],@MSG[2]
vrev32.8 @MSG[3],@MSG[3]
vmov $ABCD_SAVE,$ABCD @ offload
vmov $EFGH_SAVE,$EFGH
teq $inp,$len
___
for($i=0;$i<12;$i++) {
$code.=<<___;
vld1.32 {$W1},[$Ktbl]!
vadd.i32 $W0,$W0,@MSG[0]
sha256su0 @MSG[0],@MSG[1]
vmov $abcd,$ABCD
sha256h $ABCD,$EFGH,$W0
sha256h2 $EFGH,$abcd,$W0
sha256su1 @MSG[0],@MSG[2],@MSG[3]
___
($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
}
$code.=<<___;
vld1.32 {$W1},[$Ktbl]!
vadd.i32 $W0,$W0,@MSG[0]
vmov $abcd,$ABCD
sha256h $ABCD,$EFGH,$W0
sha256h2 $EFGH,$abcd,$W0
vld1.32 {$W0},[$Ktbl]!
vadd.i32 $W1,$W1,@MSG[1]
vmov $abcd,$ABCD
sha256h $ABCD,$EFGH,$W1
sha256h2 $EFGH,$abcd,$W1
vld1.32 {$W1},[$Ktbl]
vadd.i32 $W0,$W0,@MSG[2]
sub $Ktbl,$Ktbl,#256-16 @ rewind
vmov $abcd,$ABCD
sha256h $ABCD,$EFGH,$W0
sha256h2 $EFGH,$abcd,$W0
vadd.i32 $W1,$W1,@MSG[3]
vmov $abcd,$ABCD
sha256h $ABCD,$EFGH,$W1
sha256h2 $EFGH,$abcd,$W1
vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
it ne
bne .Loop_v8
vst1.32 {$ABCD,$EFGH},[$ctx]
ret @ bx lr
.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
#endif
___
}}}
$code.=<<___;
.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm OPENSSL_armcap_P,4,4
#endif
___
open SELF,$0;
while(<SELF>) {
next if (/^#!/);
last if (!s/^#/@/ and !/^$/);
print;
}
close SELF;
{ my %opcode = (
"sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
"sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
sub unsha256 {
my ($mnemonic,$arg)=@_;
if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
|(($2&7)<<17)|(($2&8)<<4)
|(($3&7)<<1) |(($3&8)<<2);
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
}
}
}
foreach (split($/,$code)) {
s/\`([^\`]*)\`/eval $1/geo;
s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";
}
close STDOUT; # enforce flush

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
/*
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
* using optimized ARM assembler and NEON instructions.
*
* Copyright © 2015 Google Inc.
*
* This file is based on sha256_ssse3_glue.c:
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include "sha256_glue.h"
asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
unsigned int num_blks);
int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
/* make sure casting to sha256_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
return sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order);
}
EXPORT_SYMBOL(crypto_sha256_arm_update);
static int sha256_final(struct shash_desc *desc, u8 *out)
{
sha256_base_do_finalize(desc,
(sha256_block_fn *)sha256_block_data_order);
return sha256_base_finish(desc, out);
}
int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order);
return sha256_final(desc, out);
}
EXPORT_SYMBOL(crypto_sha256_arm_finup);
static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = crypto_sha256_arm_update,
.final = sha256_final,
.finup = crypto_sha256_arm_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = crypto_sha256_arm_update,
.final = sha256_final,
.finup = crypto_sha256_arm_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha256_mod_init(void)
{
int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
if (res < 0)
return res;
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
res = crypto_register_shashes(sha256_neon_algs,
ARRAY_SIZE(sha256_neon_algs));
if (res < 0)
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
return res;
}
static void __exit sha256_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
crypto_unregister_shashes(sha256_neon_algs,
ARRAY_SIZE(sha256_neon_algs));
}
module_init(sha256_mod_init);
module_exit(sha256_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
MODULE_ALIAS_CRYPTO("sha256");

View File

@ -0,0 +1,14 @@
#ifndef _CRYPTO_SHA256_GLUE_H
#define _CRYPTO_SHA256_GLUE_H
#include <linux/crypto.h>
extern struct shash_alg sha256_neon_algs[2];
int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
unsigned int len);
int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *hash);
#endif /* _CRYPTO_SHA256_GLUE_H */

View File

@ -0,0 +1,101 @@
/*
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
* using NEON instructions.
*
* Copyright © 2015 Google Inc.
*
* This file is based on sha512_neon_glue.c:
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include "sha256_glue.h"
asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
unsigned int num_blks);
static int sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
if (!may_use_simd() ||
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
return crypto_sha256_arm_update(desc, data, len);
kernel_neon_begin();
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order_neon);
kernel_neon_end();
return 0;
}
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!may_use_simd())
return crypto_sha256_arm_finup(desc, data, len, out);
kernel_neon_begin();
if (len)
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order_neon);
sha256_base_do_finalize(desc,
(sha256_block_fn *)sha256_block_data_order_neon);
kernel_neon_end();
return sha256_base_finish(desc, out);
}
static int sha256_final(struct shash_desc *desc, u8 *out)
{
return sha256_finup(desc, NULL, 0, out);
}
struct shash_alg sha256_neon_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = sha256_update,
.final = sha256_final,
.finup = sha256_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = sha256_update,
.final = sha256_final,
.finup = sha256_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };

View File

@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__ecb-aes-" MODE,
.cra_driver_name = "__driver-ecb-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__cbc-aes-" MODE,
.cra_driver_name = "__driver-cbc-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__ctr-aes-" MODE,
.cra_driver_name = "__driver-ctr-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__xts-aes-" MODE,
.cra_driver_name = "__driver-xts-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,

View File

@ -66,8 +66,8 @@
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
/*
* void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
* u8 *head, long bytes)
* void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
* int blocks)
*/
ENTRY(sha1_ce_transform)
/* load round constants */
@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
ld1r {k3.4s}, [x6]
/* load state */
ldr dga, [x2]
ldr dgb, [x2, #16]
ldr dga, [x0]
ldr dgb, [x0, #16]
/* load partial state (if supplied) */
cbz x3, 0f
ld1 {v8.4s-v11.4s}, [x3]
b 1f
/* load sha1_ce_state::finalize */
ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
sub w0, w0, #1
sub w2, w2, #1
1:
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )
2: add t0.4s, v8.4s, k0.4s
1: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb
@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
cbnz w0, 0b
cbnz w2, 0b
/*
* Final block: add padding and total bit count.
* Skip if we have no total byte count in x4. In that case, the input
* size was not a round multiple of the block size, and the padding is
* handled by the C code.
* Skip if the input size was not a round multiple of the block size,
* the padding is handled by the C code in that case.
*/
cbz x4, 3f
ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
b 2b
b 1b
/* store new state */
3: str dga, [x2]
str dgb, [x2, #16]
3: str dga, [x0]
str dgb, [x0, #16]
ret
ENDPROC(sha1_ce_transform)

View File

@ -12,144 +12,81 @@
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
#define ASM_EXPORT(sym, val) \
asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head, long bytes);
struct sha1_ce_state {
struct sha1_state sst;
u32 finalize;
};
static int sha1_init(struct shash_desc *desc)
asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
int blocks);
static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
sctx->count += len;
if ((partial + len) >= SHA1_BLOCK_SIZE) {
int blocks;
if (partial) {
int p = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, p);
data += p;
len -= p;
}
blocks = len / SHA1_BLOCK_SIZE;
len %= SHA1_BLOCK_SIZE;
kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state,
partial ? sctx->buffer : NULL, 0);
kernel_neon_end();
data += blocks * SHA1_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buffer + partial, data, len);
return 0;
}
static int sha1_final(struct shash_desc *desc, u8 *out)
{
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
struct sha1_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
__be32 *dst = (__be32 *)out;
int i;
u32 padlen = SHA1_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
sha1_update(desc, padding, padlen);
sha1_update(desc, (const u8 *)&bits, sizeof(bits));
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha1_state){};
return 0;
}
static int sha1_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int blocks;
int i;
if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
sha1_update(desc, data, len);
return sha1_final(desc, out);
}
/*
* Use a fast path if the input is a multiple of 64 bytes. In
* this case, there is no need to copy data around, and we can
* perform the entire digest calculation in a single invocation
* of sha1_ce_transform()
*/
blocks = len / SHA1_BLOCK_SIZE;
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
sctx->finalize = 0;
kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state, NULL, len);
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha1_state){};
return 0;
}
static int sha1_export(struct shash_desc *desc, void *out)
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state *dst = out;
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
*dst = *sctx;
return 0;
ASM_EXPORT(sha1_ce_offsetof_count,
offsetof(struct sha1_ce_state, sst.count));
ASM_EXPORT(sha1_ce_offsetof_finalize,
offsetof(struct sha1_ce_state, finalize));
/*
* Allow the asm code to perform the finalization if there is no
* partial data and the input is a round multiple of the block size.
*/
sctx->finalize = finalize;
kernel_neon_begin_partial(16);
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_ce_transform);
if (!finalize)
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
return sha1_base_finish(desc, out);
}
static int sha1_import(struct shash_desc *desc, const void *in)
static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state const *src = in;
*sctx = *src;
return 0;
kernel_neon_begin_partial(16);
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
return sha1_base_finish(desc, out);
}
static struct shash_alg alg = {
.init = sha1_init,
.update = sha1_update,
.final = sha1_final,
.finup = sha1_finup,
.export = sha1_export,
.import = sha1_import,
.descsize = sizeof(struct sha1_state),
.init = sha1_base_init,
.update = sha1_ce_update,
.final = sha1_ce_final,
.finup = sha1_ce_finup,
.descsize = sizeof(struct sha1_ce_state),
.digestsize = SHA1_DIGEST_SIZE,
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",

View File

@ -73,8 +73,8 @@
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
* void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
* u8 *head, long bytes)
* void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
* int blocks)
*/
ENTRY(sha2_ce_transform)
/* load round constants */
@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
ldp dga, dgb, [x2]
ldp dga, dgb, [x0]
/* load partial input (if supplied) */
cbz x3, 0f
ld1 {v16.4s-v19.4s}, [x3]
b 1f
/* load sha256_ce_state::finalize */
ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
sub w0, w0, #1
sub w2, w2, #1
1:
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
2: add t0.4s, v16.4s, v0.4s
1: add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b )
add dgbv.4s, dgbv.4s, dg1v.4s
/* handled all input blocks? */
cbnz w0, 0b
cbnz w2, 0b
/*
* Final block: add padding and total bit count.
* Skip if we have no total byte count in x4. In that case, the input
* size was not a round multiple of the block size, and the padding is
* handled by the C code.
* Skip if the input size was not a round multiple of the block size,
* the padding is handled by the C code in that case.
*/
cbz x4, 3f
ldr x4, [x0, #:lo12:sha256_ce_offsetof_count]
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b )
mov x4, #0
mov v19.d[0], xzr
mov v19.d[1], x7
b 2b
b 1b
/* store new state */
3: stp dga, dgb, [x2]
3: stp dga, dgb, [x0]
ret
ENDPROC(sha2_ce_transform)

View File

@ -12,206 +12,82 @@
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
#define ASM_EXPORT(sym, val) \
asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head, long bytes);
struct sha256_ce_state {
struct sha256_state sst;
u32 finalize;
};
static int sha224_init(struct shash_desc *desc)
asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
int blocks);
static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
sctx->finalize = 0;
kernel_neon_begin_partial(28);
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
*sctx = (struct sha256_state){
.state = {
SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
}
};
return 0;
}
static int sha256_init(struct shash_desc *desc)
static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
*sctx = (struct sha256_state){
.state = {
SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
}
};
return 0;
}
static int sha2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
sctx->count += len;
if ((partial + len) >= SHA256_BLOCK_SIZE) {
int blocks;
if (partial) {
int p = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, p);
data += p;
len -= p;
}
blocks = len / SHA256_BLOCK_SIZE;
len %= SHA256_BLOCK_SIZE;
kernel_neon_begin_partial(28);
sha2_ce_transform(blocks, data, sctx->state,
partial ? sctx->buf : NULL, 0);
kernel_neon_end();
data += blocks * SHA256_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buf + partial, data, len);
return 0;
}
static void sha2_final(struct shash_desc *desc)
{
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
struct sha256_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
u32 padlen = SHA256_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
sha2_update(desc, padding, padlen);
sha2_update(desc, (const u8 *)&bits, sizeof(bits));
}
static int sha224_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static void sha2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
int blocks;
if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
sha2_update(desc, data, len);
sha2_final(desc);
return;
}
ASM_EXPORT(sha256_ce_offsetof_count,
offsetof(struct sha256_ce_state, sst.count));
ASM_EXPORT(sha256_ce_offsetof_finalize,
offsetof(struct sha256_ce_state, finalize));
/*
* Use a fast path if the input is a multiple of 64 bytes. In
* this case, there is no need to copy data around, and we can
* perform the entire digest calculation in a single invocation
* of sha2_ce_transform()
* Allow the asm code to perform the finalization if there is no
* partial data and the input is a round multiple of the block size.
*/
blocks = len / SHA256_BLOCK_SIZE;
sctx->finalize = finalize;
kernel_neon_begin_partial(28);
sha2_ce_transform(blocks, data, sctx->state, NULL, len);
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha2_ce_transform);
if (!finalize)
sha256_base_do_finalize(desc,
(sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
return sha256_base_finish(desc, out);
}
static int sha224_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
static int sha256_ce_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_finup(desc, data, len);
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_finup(desc, data, len);
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha2_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state *dst = out;
*dst = *sctx;
return 0;
}
static int sha2_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state const *src = in;
*sctx = *src;
return 0;
kernel_neon_begin_partial(28);
sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
return sha256_base_finish(desc, out);
}
static struct shash_alg algs[] = { {
.init = sha224_init,
.update = sha2_update,
.final = sha224_final,
.finup = sha224_finup,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.init = sha224_base_init,
.update = sha256_ce_update,
.final = sha256_ce_final,
.finup = sha256_ce_finup,
.descsize = sizeof(struct sha256_ce_state),
.digestsize = SHA224_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
.cra_module = THIS_MODULE,
}
}, {
.init = sha256_init,
.update = sha2_update,
.final = sha256_final,
.finup = sha256_finup,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.init = sha256_base_init,
.update = sha256_ce_update,
.final = sha256_ce_final,
.finup = sha256_ce_finup,
.descsize = sizeof(struct sha256_ce_state),
.digestsize = SHA256_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",

View File

@ -4,4 +4,7 @@
obj-y += octeon-crypto.o
obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
obj-$(CONFIG_CRYPTO_SHA1_OCTEON) += octeon-sha1.o
obj-$(CONFIG_CRYPTO_SHA256_OCTEON) += octeon-sha256.o
obj-$(CONFIG_CRYPTO_SHA512_OCTEON) += octeon-sha512.o

View File

@ -17,7 +17,7 @@
* crypto operations in calls to octeon_crypto_enable/disable in order to make
* sure the state of COP2 isn't corrupted if userspace is also performing
* hardware crypto operations. Allocate the state parameter on the stack.
* Preemption must be disabled to prevent context switches.
* Returns with preemption disabled.
*
* @state: Pointer to state structure to store current COP2 state in.
*
@ -28,6 +28,7 @@ unsigned long octeon_crypto_enable(struct octeon_cop2_state *state)
int status;
unsigned long flags;
preempt_disable();
local_irq_save(flags);
status = read_c0_status();
write_c0_status(status | ST0_CU2);
@ -62,5 +63,6 @@ void octeon_crypto_disable(struct octeon_cop2_state *state,
else
write_c0_status(read_c0_status() & ~ST0_CU2);
local_irq_restore(flags);
preempt_enable();
}
EXPORT_SYMBOL_GPL(octeon_crypto_disable);

View File

@ -5,7 +5,8 @@
*
* Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved.
*
* MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>.
* MD5/SHA1/SHA256/SHA512 instruction definitions added by
* Aaro Koskinen <aaro.koskinen@iki.fi>.
*
*/
#ifndef __LINUX_OCTEON_CRYPTO_H
@ -21,11 +22,11 @@ extern void octeon_crypto_disable(struct octeon_cop2_state *state,
unsigned long flags);
/*
* Macros needed to implement MD5:
* Macros needed to implement MD5/SHA1/SHA256:
*/
/*
* The index can be 0-1.
* The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
*/
#define write_octeon_64bit_hash_dword(value, index) \
do { \
@ -36,7 +37,7 @@ do { \
} while (0)
/*
* The index can be 0-1.
* The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
*/
#define read_octeon_64bit_hash_dword(index) \
({ \
@ -72,4 +73,78 @@ do { \
: [rt] "d" (value)); \
} while (0)
/*
* The value is the final block dword (64-bit).
*/
#define octeon_sha1_start(value) \
do { \
__asm__ __volatile__ ( \
"dmtc2 %[rt],0x4057" \
: \
: [rt] "d" (value)); \
} while (0)
/*
* The value is the final block dword (64-bit).
*/
#define octeon_sha256_start(value) \
do { \
__asm__ __volatile__ ( \
"dmtc2 %[rt],0x404f" \
: \
: [rt] "d" (value)); \
} while (0)
/*
* Macros needed to implement SHA512:
*/
/*
* The index can be 0-7.
*/
#define write_octeon_64bit_hash_sha512(value, index) \
do { \
__asm__ __volatile__ ( \
"dmtc2 %[rt],0x0250+" STR(index) \
: \
: [rt] "d" (value)); \
} while (0)
/*
* The index can be 0-7.
*/
#define read_octeon_64bit_hash_sha512(index) \
({ \
u64 __value; \
\
__asm__ __volatile__ ( \
"dmfc2 %[rt],0x0250+" STR(index) \
: [rt] "=d" (__value) \
: ); \
\
__value; \
})
/*
* The index can be 0-14.
*/
#define write_octeon_64bit_block_sha512(value, index) \
do { \
__asm__ __volatile__ ( \
"dmtc2 %[rt],0x0240+" STR(index) \
: \
: [rt] "d" (value)); \
} while (0)
/*
* The value is the final block word (64-bit).
*/
#define octeon_sha512_start(value) \
do { \
__asm__ __volatile__ ( \
"dmtc2 %[rt],0x424f" \
: \
: [rt] "d" (value)); \
} while (0)
#endif /* __LINUX_OCTEON_CRYPTO_H */

View File

@ -97,8 +97,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data,
avail);
local_bh_disable();
preempt_disable();
flags = octeon_crypto_enable(&state);
octeon_md5_store_hash(mctx);
@ -114,8 +112,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
octeon_md5_read_hash(mctx);
octeon_crypto_disable(&state, flags);
preempt_enable();
local_bh_enable();
memcpy(mctx->block, data, len);
@ -133,8 +129,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
*p++ = 0x80;
local_bh_disable();
preempt_disable();
flags = octeon_crypto_enable(&state);
octeon_md5_store_hash(mctx);
@ -152,8 +146,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
octeon_md5_read_hash(mctx);
octeon_crypto_disable(&state, flags);
preempt_enable();
local_bh_enable();
memcpy(out, mctx->hash, sizeof(mctx->hash));
memset(mctx, 0, sizeof(*mctx));

View File

@ -0,0 +1,241 @@
/*
* Cryptographic API.
*
* SHA1 Secure Hash Algorithm.
*
* Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
*
* Based on crypto/sha1_generic.c, which is:
*
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <linux/mm.h>
#include <crypto/sha.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/module.h>
#include <asm/byteorder.h>
#include <asm/octeon/octeon.h>
#include <crypto/internal/hash.h>
#include "octeon-crypto.h"
/*
* We pass everything as 64-bit. OCTEON can handle misaligned data.
*/
static void octeon_sha1_store_hash(struct sha1_state *sctx)
{
u64 *hash = (u64 *)sctx->state;
union {
u32 word[2];
u64 dword;
} hash_tail = { { sctx->state[4], } };
write_octeon_64bit_hash_dword(hash[0], 0);
write_octeon_64bit_hash_dword(hash[1], 1);
write_octeon_64bit_hash_dword(hash_tail.dword, 2);
memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0]));
}
static void octeon_sha1_read_hash(struct sha1_state *sctx)
{
u64 *hash = (u64 *)sctx->state;
union {
u32 word[2];
u64 dword;
} hash_tail;
hash[0] = read_octeon_64bit_hash_dword(0);
hash[1] = read_octeon_64bit_hash_dword(1);
hash_tail.dword = read_octeon_64bit_hash_dword(2);
sctx->state[4] = hash_tail.word[0];
memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword));
}
static void octeon_sha1_transform(const void *_block)
{
const u64 *block = _block;
write_octeon_64bit_block_dword(block[0], 0);
write_octeon_64bit_block_dword(block[1], 1);
write_octeon_64bit_block_dword(block[2], 2);
write_octeon_64bit_block_dword(block[3], 3);
write_octeon_64bit_block_dword(block[4], 4);
write_octeon_64bit_block_dword(block[5], 5);
write_octeon_64bit_block_dword(block[6], 6);
octeon_sha1_start(block[7]);
}
static int octeon_sha1_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA1_H0;
sctx->state[1] = SHA1_H1;
sctx->state[2] = SHA1_H2;
sctx->state[3] = SHA1_H3;
sctx->state[4] = SHA1_H4;
sctx->count = 0;
return 0;
}
static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data,
unsigned int len)
{
unsigned int partial;
unsigned int done;
const u8 *src;
partial = sctx->count % SHA1_BLOCK_SIZE;
sctx->count += len;
done = 0;
src = data;
if ((partial + len) >= SHA1_BLOCK_SIZE) {
if (partial) {
done = -partial;
memcpy(sctx->buffer + partial, data,
done + SHA1_BLOCK_SIZE);
src = sctx->buffer;
}
do {
octeon_sha1_transform(src);
done += SHA1_BLOCK_SIZE;
src = data + done;
} while (done + SHA1_BLOCK_SIZE <= len);
partial = 0;
}
memcpy(sctx->buffer + partial, src, len - done);
}
static int octeon_sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct octeon_cop2_state state;
unsigned long flags;
/*
* Small updates never reach the crypto engine, so the generic sha1 is
* faster because of the heavyweight octeon_crypto_enable() /
* octeon_crypto_disable().
*/
if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return crypto_sha1_update(desc, data, len);
flags = octeon_crypto_enable(&state);
octeon_sha1_store_hash(sctx);
__octeon_sha1_update(sctx, data, len);
octeon_sha1_read_hash(sctx);
octeon_crypto_disable(&state, flags);
return 0;
}
static int octeon_sha1_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
static const u8 padding[64] = { 0x80, };
struct octeon_cop2_state state;
__be32 *dst = (__be32 *)out;
unsigned int pad_len;
unsigned long flags;
unsigned int index;
__be64 bits;
int i;
/* Save number of bits. */
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64. */
index = sctx->count & 0x3f;
pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
flags = octeon_crypto_enable(&state);
octeon_sha1_store_hash(sctx);
__octeon_sha1_update(sctx, padding, pad_len);
/* Append length (before padding). */
__octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits));
octeon_sha1_read_hash(sctx);
octeon_crypto_disable(&state, flags);
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Zeroize sensitive information. */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int octeon_sha1_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int octeon_sha1_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg octeon_sha1_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = octeon_sha1_init,
.update = octeon_sha1_update,
.final = octeon_sha1_final,
.export = octeon_sha1_export,
.import = octeon_sha1_import,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "octeon-sha1",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init octeon_sha1_mod_init(void)
{
if (!octeon_has_crypto())
return -ENOTSUPP;
return crypto_register_shash(&octeon_sha1_alg);
}
static void __exit octeon_sha1_mod_fini(void)
{
crypto_unregister_shash(&octeon_sha1_alg);
}
module_init(octeon_sha1_mod_init);
module_exit(octeon_sha1_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)");
MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");

View File

@ -0,0 +1,280 @@
/*
* Cryptographic API.
*
* SHA-224 and SHA-256 Secure Hash Algorithm.
*
* Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
*
* Based on crypto/sha256_generic.c, which is:
*
* Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
* SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <linux/mm.h>
#include <crypto/sha.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/module.h>
#include <asm/byteorder.h>
#include <asm/octeon/octeon.h>
#include <crypto/internal/hash.h>
#include "octeon-crypto.h"
/*
* We pass everything as 64-bit. OCTEON can handle misaligned data.
*/
static void octeon_sha256_store_hash(struct sha256_state *sctx)
{
u64 *hash = (u64 *)sctx->state;
write_octeon_64bit_hash_dword(hash[0], 0);
write_octeon_64bit_hash_dword(hash[1], 1);
write_octeon_64bit_hash_dword(hash[2], 2);
write_octeon_64bit_hash_dword(hash[3], 3);
}
static void octeon_sha256_read_hash(struct sha256_state *sctx)
{
u64 *hash = (u64 *)sctx->state;
hash[0] = read_octeon_64bit_hash_dword(0);
hash[1] = read_octeon_64bit_hash_dword(1);
hash[2] = read_octeon_64bit_hash_dword(2);
hash[3] = read_octeon_64bit_hash_dword(3);
}
static void octeon_sha256_transform(const void *_block)
{
const u64 *block = _block;
write_octeon_64bit_block_dword(block[0], 0);
write_octeon_64bit_block_dword(block[1], 1);
write_octeon_64bit_block_dword(block[2], 2);
write_octeon_64bit_block_dword(block[3], 3);
write_octeon_64bit_block_dword(block[4], 4);
write_octeon_64bit_block_dword(block[5], 5);
write_octeon_64bit_block_dword(block[6], 6);
octeon_sha256_start(block[7]);
}
static int octeon_sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
static int octeon_sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
}
static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data,
unsigned int len)
{
unsigned int partial;
unsigned int done;
const u8 *src;
partial = sctx->count % SHA256_BLOCK_SIZE;
sctx->count += len;
done = 0;
src = data;
if ((partial + len) >= SHA256_BLOCK_SIZE) {
if (partial) {
done = -partial;
memcpy(sctx->buf + partial, data,
done + SHA256_BLOCK_SIZE);
src = sctx->buf;
}
do {
octeon_sha256_transform(src);
done += SHA256_BLOCK_SIZE;
src = data + done;
} while (done + SHA256_BLOCK_SIZE <= len);
partial = 0;
}
memcpy(sctx->buf + partial, src, len - done);
}
static int octeon_sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct octeon_cop2_state state;
unsigned long flags;
/*
* Small updates never reach the crypto engine, so the generic sha256 is
* faster because of the heavyweight octeon_crypto_enable() /
* octeon_crypto_disable().
*/
if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
return crypto_sha256_update(desc, data, len);
flags = octeon_crypto_enable(&state);
octeon_sha256_store_hash(sctx);
__octeon_sha256_update(sctx, data, len);
octeon_sha256_read_hash(sctx);
octeon_crypto_disable(&state, flags);
return 0;
}
static int octeon_sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
static const u8 padding[64] = { 0x80, };
struct octeon_cop2_state state;
__be32 *dst = (__be32 *)out;
unsigned int pad_len;
unsigned long flags;
unsigned int index;
__be64 bits;
int i;
/* Save number of bits. */
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64. */
index = sctx->count & 0x3f;
pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
flags = octeon_crypto_enable(&state);
octeon_sha256_store_hash(sctx);
__octeon_sha256_update(sctx, padding, pad_len);
/* Append length (before padding). */
__octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
octeon_sha256_read_hash(sctx);
octeon_crypto_disable(&state, flags);
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Zeroize sensitive information. */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int octeon_sha224_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA256_DIGEST_SIZE];
octeon_sha256_final(desc, D);
memcpy(hash, D, SHA224_DIGEST_SIZE);
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
static int octeon_sha256_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int octeon_sha256_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg octeon_sha256_algs[2] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = octeon_sha256_init,
.update = octeon_sha256_update,
.final = octeon_sha256_final,
.export = octeon_sha256_export,
.import = octeon_sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name= "octeon-sha256",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = octeon_sha224_init,
.update = octeon_sha256_update,
.final = octeon_sha224_final,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name= "octeon-sha224",
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init octeon_sha256_mod_init(void)
{
if (!octeon_has_crypto())
return -ENOTSUPP;
return crypto_register_shashes(octeon_sha256_algs,
ARRAY_SIZE(octeon_sha256_algs));
}
static void __exit octeon_sha256_mod_fini(void)
{
crypto_unregister_shashes(octeon_sha256_algs,
ARRAY_SIZE(octeon_sha256_algs));
}
module_init(octeon_sha256_mod_init);
module_exit(octeon_sha256_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)");
MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");

View File

@ -0,0 +1,277 @@
/*
* Cryptographic API.
*
* SHA-512 and SHA-384 Secure Hash Algorithm.
*
* Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
*
* Based on crypto/sha512_generic.c, which is:
*
* Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*/
#include <linux/mm.h>
#include <crypto/sha.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/module.h>
#include <asm/byteorder.h>
#include <asm/octeon/octeon.h>
#include <crypto/internal/hash.h>
#include "octeon-crypto.h"
/*
* We pass everything as 64-bit. OCTEON can handle misaligned data.
*/
static void octeon_sha512_store_hash(struct sha512_state *sctx)
{
write_octeon_64bit_hash_sha512(sctx->state[0], 0);
write_octeon_64bit_hash_sha512(sctx->state[1], 1);
write_octeon_64bit_hash_sha512(sctx->state[2], 2);
write_octeon_64bit_hash_sha512(sctx->state[3], 3);
write_octeon_64bit_hash_sha512(sctx->state[4], 4);
write_octeon_64bit_hash_sha512(sctx->state[5], 5);
write_octeon_64bit_hash_sha512(sctx->state[6], 6);
write_octeon_64bit_hash_sha512(sctx->state[7], 7);
}
static void octeon_sha512_read_hash(struct sha512_state *sctx)
{
sctx->state[0] = read_octeon_64bit_hash_sha512(0);
sctx->state[1] = read_octeon_64bit_hash_sha512(1);
sctx->state[2] = read_octeon_64bit_hash_sha512(2);
sctx->state[3] = read_octeon_64bit_hash_sha512(3);
sctx->state[4] = read_octeon_64bit_hash_sha512(4);
sctx->state[5] = read_octeon_64bit_hash_sha512(5);
sctx->state[6] = read_octeon_64bit_hash_sha512(6);
sctx->state[7] = read_octeon_64bit_hash_sha512(7);
}
static void octeon_sha512_transform(const void *_block)
{
const u64 *block = _block;
write_octeon_64bit_block_sha512(block[0], 0);
write_octeon_64bit_block_sha512(block[1], 1);
write_octeon_64bit_block_sha512(block[2], 2);
write_octeon_64bit_block_sha512(block[3], 3);
write_octeon_64bit_block_sha512(block[4], 4);
write_octeon_64bit_block_sha512(block[5], 5);
write_octeon_64bit_block_sha512(block[6], 6);
write_octeon_64bit_block_sha512(block[7], 7);
write_octeon_64bit_block_sha512(block[8], 8);
write_octeon_64bit_block_sha512(block[9], 9);
write_octeon_64bit_block_sha512(block[10], 10);
write_octeon_64bit_block_sha512(block[11], 11);
write_octeon_64bit_block_sha512(block[12], 12);
write_octeon_64bit_block_sha512(block[13], 13);
write_octeon_64bit_block_sha512(block[14], 14);
octeon_sha512_start(block[15]);
}
static int octeon_sha512_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA512_H0;
sctx->state[1] = SHA512_H1;
sctx->state[2] = SHA512_H2;
sctx->state[3] = SHA512_H3;
sctx->state[4] = SHA512_H4;
sctx->state[5] = SHA512_H5;
sctx->state[6] = SHA512_H6;
sctx->state[7] = SHA512_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int octeon_sha384_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data,
unsigned int len)
{
unsigned int part_len;
unsigned int index;
unsigned int i;
/* Compute number of bytes mod 128. */
index = sctx->count[0] % SHA512_BLOCK_SIZE;
/* Update number of bytes. */
if ((sctx->count[0] += len) < len)
sctx->count[1]++;
part_len = SHA512_BLOCK_SIZE - index;
/* Transform as many times as possible. */
if (len >= part_len) {
memcpy(&sctx->buf[index], data, part_len);
octeon_sha512_transform(sctx->buf);
for (i = part_len; i + SHA512_BLOCK_SIZE <= len;
i += SHA512_BLOCK_SIZE)
octeon_sha512_transform(&data[i]);
index = 0;
} else {
i = 0;
}
/* Buffer remaining input. */
memcpy(&sctx->buf[index], &data[i], len - i);
}
static int octeon_sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
struct octeon_cop2_state state;
unsigned long flags;
/*
* Small updates never reach the crypto engine, so the generic sha512 is
* faster because of the heavyweight octeon_crypto_enable() /
* octeon_crypto_disable().
*/
if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
return crypto_sha512_update(desc, data, len);
flags = octeon_crypto_enable(&state);
octeon_sha512_store_hash(sctx);
__octeon_sha512_update(sctx, data, len);
octeon_sha512_read_hash(sctx);
octeon_crypto_disable(&state, flags);
return 0;
}
static int octeon_sha512_final(struct shash_desc *desc, u8 *hash)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
static u8 padding[128] = { 0x80, };
struct octeon_cop2_state state;
__be64 *dst = (__be64 *)hash;
unsigned int pad_len;
unsigned long flags;
unsigned int index;
__be64 bits[2];
int i;
/* Save number of bits. */
bits[1] = cpu_to_be64(sctx->count[0] << 3);
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
/* Pad out to 112 mod 128. */
index = sctx->count[0] & 0x7f;
pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
flags = octeon_crypto_enable(&state);
octeon_sha512_store_hash(sctx);
__octeon_sha512_update(sctx, padding, pad_len);
/* Append length (before padding). */
__octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits));
octeon_sha512_read_hash(sctx);
octeon_crypto_disable(&state, flags);
/* Store state in digest. */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be64(sctx->state[i]);
/* Zeroize sensitive information. */
memset(sctx, 0, sizeof(struct sha512_state));
return 0;
}
static int octeon_sha384_final(struct shash_desc *desc, u8 *hash)
{
u8 D[64];
octeon_sha512_final(desc, D);
memcpy(hash, D, 48);
memzero_explicit(D, 64);
return 0;
}
static struct shash_alg octeon_sha512_algs[2] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = octeon_sha512_init,
.update = octeon_sha512_update,
.final = octeon_sha512_final,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name= "octeon-sha512",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = octeon_sha384_init,
.update = octeon_sha512_update,
.final = octeon_sha384_final,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name= "octeon-sha384",
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init octeon_sha512_mod_init(void)
{
if (!octeon_has_crypto())
return -ENOTSUPP;
return crypto_register_shashes(octeon_sha512_algs,
ARRAY_SIZE(octeon_sha512_algs));
}
static void __exit octeon_sha512_mod_fini(void)
{
crypto_unregister_shashes(octeon_sha512_algs,
ARRAY_SIZE(octeon_sha512_algs));
}
module_init(octeon_sha512_mod_init);
module_exit(octeon_sha512_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)");
MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");

View File

@ -1258,20 +1258,6 @@
#define M2M_SRCID_REG(x) ((x) * 0x40 + 0x14)
#define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18)
/*************************************************************************
* _REG relative to RSET_RNG
*************************************************************************/
#define RNG_CTRL 0x00
#define RNG_EN (1 << 0)
#define RNG_STAT 0x04
#define RNG_AVAIL_MASK (0xff000000)
#define RNG_DATA 0x08
#define RNG_THRES 0x0c
#define RNG_MASK 0x10
/*************************************************************************
* _REG relative to RSET_SPI
*************************************************************************/

View File

@ -4,6 +4,14 @@
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
md5-ppc-y := md5-asm.o md5-glue.o
sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o

View File

@ -0,0 +1,351 @@
/*
* Fast AES implementation for SPE instruction set (PPC)
*
* This code makes use of the SPE SIMD instruction set as defined in
* http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
* Implementation is based on optimization guide notes from
* http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"
#define EAD(in, bpos) \
rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
#define DAD(in, bpos) \
rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
#define LWH(out, off) \
evlwwsplat out,off(rT0); /* load word high */
#define LWL(out, off) \
lwz out,off(rT0); /* load word low */
#define LBZ(out, tab, off) \
lbz out,off(tab); /* load byte */
#define LAH(out, in, bpos, off) \
EAD(in, bpos) /* calc addr + load word high */ \
LWH(out, off)
#define LAL(out, in, bpos, off) \
EAD(in, bpos) /* calc addr + load word low */ \
LWL(out, off)
#define LAE(out, in, bpos) \
EAD(in, bpos) /* calc addr + load enc byte */ \
LBZ(out, rT0, 8)
#define LBE(out) \
LBZ(out, rT0, 8) /* load enc byte */
#define LAD(out, in, bpos) \
DAD(in, bpos) /* calc addr + load dec byte */ \
LBZ(out, rT1, 0)
#define LBD(out) \
LBZ(out, rT1, 0)
/*
* ppc_encrypt_block: The central encryption function for a single 16 bytes
* block. It does no stack handling or register saving to support fast calls
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
* and rW0-rW3 and caller must execute a final xor on the ouput registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
_GLOBAL(ppc_encrypt_block)
LAH(rW4, rD1, 2, 4)
LAH(rW6, rD0, 3, 0)
LAH(rW3, rD0, 1, 8)
ppc_encrypt_block_loop:
LAH(rW0, rD3, 0, 12)
LAL(rW0, rD0, 0, 12)
LAH(rW1, rD1, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAL(rW3, rD1, 1, 8)
LAL(rW4, rD2, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD3, 2, 4)
LAL(rW5, rD0, 2, 4)
LAH(rW7, rD2, 3, 0)
evldw rD1,16(rKP)
EAD(rD3, 3)
evxor rW2,rW2,rW4
LWL(rW7, 0)
evxor rW2,rW2,rW6
EAD(rD2, 0)
evxor rD1,rD1,rW2
LWL(rW1, 12)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 2)
evxor rW3,rW3,rW5
LWH(rW4, 4)
evxor rW3,rW3,rW7
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW1
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
LAH(rW0, rD3, 0, 12)
LAL(rW0, rD0, 0, 12)
LAH(rW1, rD1, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAL(rW3, rD1, 1, 8)
LAL(rW4, rD2, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD3, 2, 4)
LAL(rW5, rD0, 2, 4)
LAH(rW7, rD2, 3, 0)
evldw rD1,32(rKP)
EAD(rD3, 3)
evxor rW2,rW2,rW4
LWL(rW7, 0)
evxor rW2,rW2,rW6
EAD(rD2, 0)
evxor rD1,rD1,rW2
LWL(rW1, 12)
evxor rD1,rD1,rW0
evldw rD3,40(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 2)
evxor rW3,rW3,rW5
LWH(rW4, 4)
evxor rW3,rW3,rW7
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW1
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
addi rKP,rKP,32
bdnz ppc_encrypt_block_loop
LAH(rW0, rD3, 0, 12)
LAL(rW0, rD0, 0, 12)
LAH(rW1, rD1, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAL(rW3, rD1, 1, 8)
LAL(rW4, rD2, 2, 4)
LAH(rW5, rD3, 2, 4)
LAL(rW6, rD1, 3, 0)
LAL(rW5, rD0, 2, 4)
LAH(rW7, rD2, 3, 0)
evldw rD1,16(rKP)
EAD(rD3, 3)
evxor rW2,rW2,rW4
LWL(rW7, 0)
evxor rW2,rW2,rW6
EAD(rD2, 0)
evxor rD1,rD1,rW2
LWL(rW1, 12)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 0)
evxor rW3,rW3,rW5
LBE(rW2)
evxor rW3,rW3,rW7
EAD(rD0, 1)
evxor rD3,rD3,rW3
LBE(rW6)
evxor rD3,rD3,rW1
EAD(rD0, 0)
evmergehi rD2,rD2,rD3
LBE(rW1)
LAE(rW0, rD3, 0)
LAE(rW1, rD0, 0)
LAE(rW4, rD2, 1)
LAE(rW5, rD3, 1)
LAE(rW3, rD2, 0)
LAE(rW7, rD1, 1)
rlwimi rW0,rW4,8,16,23
rlwimi rW1,rW5,8,16,23
LAE(rW4, rD1, 2)
LAE(rW5, rD2, 2)
rlwimi rW2,rW6,8,16,23
rlwimi rW3,rW7,8,16,23
LAE(rW6, rD3, 2)
LAE(rW7, rD0, 2)
rlwimi rW0,rW4,16,8,15
rlwimi rW1,rW5,16,8,15
LAE(rW4, rD0, 3)
LAE(rW5, rD1, 3)
rlwimi rW2,rW6,16,8,15
lwz rD0,32(rKP)
rlwimi rW3,rW7,16,8,15
lwz rD1,36(rKP)
LAE(rW6, rD2, 3)
LAE(rW7, rD3, 3)
rlwimi rW0,rW4,24,0,7
lwz rD2,40(rKP)
rlwimi rW1,rW5,24,0,7
lwz rD3,44(rKP)
rlwimi rW2,rW6,24,0,7
rlwimi rW3,rW7,24,0,7
blr
/*
* ppc_decrypt_block: The central decryption function for a single 16 bytes
* block. It does no stack handling or register saving to support fast calls
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
* and rW0-rW3 and caller must execute a final xor on the ouput registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
_GLOBAL(ppc_decrypt_block)
LAH(rW0, rD1, 0, 12)
LAH(rW6, rD0, 3, 0)
LAH(rW3, rD0, 1, 8)
ppc_decrypt_block_loop:
LAH(rW1, rD3, 0, 12)
LAL(rW0, rD2, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAH(rW4, rD3, 2, 4)
LAL(rW4, rD0, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD1, 2, 4)
LAH(rW7, rD2, 3, 0)
LAL(rW7, rD3, 3, 0)
LAL(rW3, rD1, 1, 8)
evldw rD1,16(rKP)
EAD(rD0, 0)
evxor rW4,rW4,rW6
LWL(rW1, 12)
evxor rW0,rW0,rW4
EAD(rD2, 2)
evxor rW0,rW0,rW2
LWL(rW5, 4)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 0)
evxor rW3,rW3,rW7
LWH(rW0, 12)
evxor rW3,rW3,rW1
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW5
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
LAH(rW1, rD3, 0, 12)
LAL(rW0, rD2, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAH(rW4, rD3, 2, 4)
LAL(rW4, rD0, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD1, 2, 4)
LAH(rW7, rD2, 3, 0)
LAL(rW7, rD3, 3, 0)
LAL(rW3, rD1, 1, 8)
evldw rD1,32(rKP)
EAD(rD0, 0)
evxor rW4,rW4,rW6
LWL(rW1, 12)
evxor rW0,rW0,rW4
EAD(rD2, 2)
evxor rW0,rW0,rW2
LWL(rW5, 4)
evxor rD1,rD1,rW0
evldw rD3,40(rKP)
evmergehi rD0,rD0,rD1
EAD(rD1, 0)
evxor rW3,rW3,rW7
LWH(rW0, 12)
evxor rW3,rW3,rW1
EAD(rD0, 3)
evxor rD3,rD3,rW3
LWH(rW6, 0)
evxor rD3,rD3,rW5
EAD(rD0, 1)
evmergehi rD2,rD2,rD3
LWH(rW3, 8)
addi rKP,rKP,32
bdnz ppc_decrypt_block_loop
LAH(rW1, rD3, 0, 12)
LAL(rW0, rD2, 0, 12)
LAH(rW2, rD2, 1, 8)
LAL(rW2, rD3, 1, 8)
LAH(rW4, rD3, 2, 4)
LAL(rW4, rD0, 2, 4)
LAL(rW6, rD1, 3, 0)
LAH(rW5, rD1, 2, 4)
LAH(rW7, rD2, 3, 0)
LAL(rW7, rD3, 3, 0)
LAL(rW3, rD1, 1, 8)
evldw rD1,16(rKP)
EAD(rD0, 0)
evxor rW4,rW4,rW6
LWL(rW1, 12)
evxor rW0,rW0,rW4
EAD(rD2, 2)
evxor rW0,rW0,rW2
LWL(rW5, 4)
evxor rD1,rD1,rW0
evldw rD3,24(rKP)
evmergehi rD0,rD0,rD1
DAD(rD1, 0)
evxor rW3,rW3,rW7
LBD(rW0)
evxor rW3,rW3,rW1
DAD(rD0, 1)
evxor rD3,rD3,rW3
LBD(rW6)
evxor rD3,rD3,rW5
DAD(rD0, 0)
evmergehi rD2,rD2,rD3
LBD(rW3)
LAD(rW2, rD3, 0)
LAD(rW1, rD2, 0)
LAD(rW4, rD2, 1)
LAD(rW5, rD3, 1)
LAD(rW7, rD1, 1)
rlwimi rW0,rW4,8,16,23
rlwimi rW1,rW5,8,16,23
LAD(rW4, rD3, 2)
LAD(rW5, rD0, 2)
rlwimi rW2,rW6,8,16,23
rlwimi rW3,rW7,8,16,23
LAD(rW6, rD1, 2)
LAD(rW7, rD2, 2)
rlwimi rW0,rW4,16,8,15
rlwimi rW1,rW5,16,8,15
LAD(rW4, rD0, 3)
LAD(rW5, rD1, 3)
rlwimi rW2,rW6,16,8,15
lwz rD0,32(rKP)
rlwimi rW3,rW7,16,8,15
lwz rD1,36(rKP)
LAD(rW6, rD2, 3)
LAD(rW7, rD3, 3)
rlwimi rW0,rW4,24,0,7
lwz rD2,40(rKP)
rlwimi rW1,rW5,24,0,7
lwz rD3,44(rKP)
rlwimi rW2,rW6,24,0,7
rlwimi rW3,rW7,24,0,7
blr

View File

@ -0,0 +1,512 @@
/*
* Glue code for AES implementation for SPE instructions (PPC)
*
* Based on generic implementation. The assembler module takes care
* about the SPE registers so it can run from interrupt context.
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/aes.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/crypto.h>
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
* between preempt_disable() and preempt_enable(). e500 cores can issue two
* instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
* bit unit (SU2). One of these can be a memory access that is executed via
* a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
* 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
* will need an estimated maximum of 20,000 cycles. Headroom for cache misses
* included. Even with the low end model clocked at 667 MHz this equals to a
* critical time window of less than 30us. The value has been choosen to
* process a 512 byte disk block in one or a large 1400 bytes IPsec network
* packet in two runs.
*
*/
#define MAX_BYTES 768
struct ppc_aes_ctx {
u32 key_enc[AES_MAX_KEYLENGTH_U32];
u32 key_dec[AES_MAX_KEYLENGTH_U32];
u32 rounds;
};
struct ppc_xts_ctx {
u32 key_enc[AES_MAX_KEYLENGTH_U32];
u32 key_dec[AES_MAX_KEYLENGTH_U32];
u32 key_twk[AES_MAX_KEYLENGTH_U32];
u32 rounds;
};
extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
u32 bytes);
extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
u32 bytes);
extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
u32 bytes, u8 *iv);
extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
u32 bytes, u8 *iv);
extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
u32 bytes, u8 *iv);
extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
u32 bytes, u8 *iv, u32 *key_twk);
extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
u32 bytes, u8 *iv, u32 *key_twk);
extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
unsigned int key_len);
static void spe_begin(void)
{
/* disable preemption and save users SPE registers if required */
preempt_disable();
enable_kernel_spe();
}
static void spe_end(void)
{
/* reenable preemption */
preempt_enable();
}
static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
if (key_len != AES_KEYSIZE_128 &&
key_len != AES_KEYSIZE_192 &&
key_len != AES_KEYSIZE_256) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
switch (key_len) {
case AES_KEYSIZE_128:
ctx->rounds = 4;
ppc_expand_key_128(ctx->key_enc, in_key);
break;
case AES_KEYSIZE_192:
ctx->rounds = 5;
ppc_expand_key_192(ctx->key_enc, in_key);
break;
case AES_KEYSIZE_256:
ctx->rounds = 6;
ppc_expand_key_256(ctx->key_enc, in_key);
break;
}
ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
return 0;
}
static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
key_len >>= 1;
if (key_len != AES_KEYSIZE_128 &&
key_len != AES_KEYSIZE_192 &&
key_len != AES_KEYSIZE_256) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
switch (key_len) {
case AES_KEYSIZE_128:
ctx->rounds = 4;
ppc_expand_key_128(ctx->key_enc, in_key);
ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
break;
case AES_KEYSIZE_192:
ctx->rounds = 5;
ppc_expand_key_192(ctx->key_enc, in_key);
ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
break;
case AES_KEYSIZE_256:
ctx->rounds = 6;
ppc_expand_key_256(ctx->key_enc, in_key);
ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
break;
}
ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
return 0;
}
static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
spe_begin();
ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
spe_end();
}
static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
spe_begin();
ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
spe_end();
}
static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int ubytes;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
ubytes = nbytes > MAX_BYTES ?
nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
nbytes -= ubytes;
spe_begin();
ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, ctx->rounds, nbytes);
spe_end();
err = blkcipher_walk_done(desc, &walk, ubytes);
}
return err;
}
static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int ubytes;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
ubytes = nbytes > MAX_BYTES ?
nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
nbytes -= ubytes;
spe_begin();
ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, ctx->rounds, nbytes);
spe_end();
err = blkcipher_walk_done(desc, &walk, ubytes);
}
return err;
}
static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int ubytes;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
ubytes = nbytes > MAX_BYTES ?
nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
nbytes -= ubytes;
spe_begin();
ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, ctx->rounds, nbytes, walk.iv);
spe_end();
err = blkcipher_walk_done(desc, &walk, ubytes);
}
return err;
}
static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int ubytes;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
ubytes = nbytes > MAX_BYTES ?
nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
nbytes -= ubytes;
spe_begin();
ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, ctx->rounds, nbytes, walk.iv);
spe_end();
err = blkcipher_walk_done(desc, &walk, ubytes);
}
return err;
}
static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int pbytes, ubytes;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
while ((pbytes = walk.nbytes)) {
pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
pbytes = pbytes == nbytes ?
nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
ubytes = walk.nbytes - pbytes;
spe_begin();
ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, ctx->rounds, pbytes , walk.iv);
spe_end();
nbytes -= pbytes;
err = blkcipher_walk_done(desc, &walk, ubytes);
}
return err;
}
static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int ubytes;
int err;
u32 *twk;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
twk = ctx->key_twk;
while ((nbytes = walk.nbytes)) {
ubytes = nbytes > MAX_BYTES ?
nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
nbytes -= ubytes;
spe_begin();
ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
spe_end();
twk = NULL;
err = blkcipher_walk_done(desc, &walk, ubytes);
}
return err;
}
static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
unsigned int ubytes;
int err;
u32 *twk;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
twk = ctx->key_twk;
while ((nbytes = walk.nbytes)) {
ubytes = nbytes > MAX_BYTES ?
nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
nbytes -= ubytes;
spe_begin();
ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
spe_end();
twk = NULL;
err = blkcipher_walk_done(desc, &walk, ubytes);
}
return err;
}
/*
* Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
* because the e500 platform can handle unaligned reads/writes very efficently.
* This improves IPsec thoughput by another few percent. Additionally we assume
* that AES context is always aligned to at least 8 bytes because it is created
* with kmalloc() in the crypto infrastructure
*
*/
static struct crypto_alg aes_algs[] = { {
.cra_name = "aes",
.cra_driver_name = "aes-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ppc_aes_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = ppc_aes_setkey,
.cia_encrypt = ppc_aes_encrypt,
.cia_decrypt = ppc_aes_decrypt
}
}
}, {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ppc_aes_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ppc_aes_setkey,
.encrypt = ppc_ecb_encrypt,
.decrypt = ppc_ecb_decrypt,
}
}
}, {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ppc_aes_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ppc_aes_setkey,
.encrypt = ppc_cbc_encrypt,
.decrypt = ppc_cbc_decrypt,
}
}
}, {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct ppc_aes_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ppc_aes_setkey,
.encrypt = ppc_ctr_crypt,
.decrypt = ppc_ctr_crypt,
}
}
}, {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ppc_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE * 2,
.max_keysize = AES_MAX_KEY_SIZE * 2,
.ivsize = AES_BLOCK_SIZE,
.setkey = ppc_xts_setkey,
.encrypt = ppc_xts_encrypt,
.decrypt = ppc_xts_decrypt,
}
}
} };
static int __init ppc_aes_mod_init(void)
{
return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
}
static void __exit ppc_aes_mod_fini(void)
{
crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
}
module_init(ppc_aes_mod_init);
module_exit(ppc_aes_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
MODULE_ALIAS_CRYPTO("aes");
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
MODULE_ALIAS_CRYPTO("aes-ppc-spe");

View File

@ -0,0 +1,283 @@
/*
* Key handling functions for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/ppc_asm.h>
#ifdef __BIG_ENDIAN__
#define LOAD_KEY(d, s, off) \
lwz d,off(s);
#else
#define LOAD_KEY(d, s, off) \
li r0,off; \
lwbrx d,s,r0;
#endif
#define INITIALIZE_KEY \
stwu r1,-32(r1); /* create stack frame */ \
stw r14,8(r1); /* save registers */ \
stw r15,12(r1); \
stw r16,16(r1);
#define FINALIZE_KEY \
lwz r14,8(r1); /* restore registers */ \
lwz r15,12(r1); \
lwz r16,16(r1); \
xor r5,r5,r5; /* clear sensitive data */ \
xor r6,r6,r6; \
xor r7,r7,r7; \
xor r8,r8,r8; \
xor r9,r9,r9; \
xor r10,r10,r10; \
xor r11,r11,r11; \
xor r12,r12,r12; \
addi r1,r1,32; /* cleanup stack */
#define LS_BOX(r, t1, t2) \
lis t2,PPC_AES_4K_ENCTAB@h; \
ori t2,t2,PPC_AES_4K_ENCTAB@l; \
rlwimi t2,r,4,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,0,24,31; \
rlwimi t2,r,28,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,8,16,23; \
rlwimi t2,r,20,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,16,8,15; \
rlwimi t2,r,12,20,27; \
lbz t1,8(t2); \
rlwimi r,t1,24,0,7;
#define GF8_MUL(out, in, t1, t2) \
lis t1,0x8080; /* multiplication in GF8 */ \
ori t1,t1,0x8080; \
and t1,t1,in; \
srwi t1,t1,7; \
mulli t1,t1,0x1b; \
lis t2,0x7f7f; \
ori t2,t2,0x7f7f; \
and t2,t2,in; \
slwi t2,t2,1; \
xor out,t1,t2;
/*
* ppc_expand_key_128(u32 *key_enc, const u8 *key)
*
* Expand 128 bit key into 176 bytes encryption key. It consists of
* key itself plus 10 rounds with 16 bytes each
*
*/
_GLOBAL(ppc_expand_key_128)
INITIALIZE_KEY
LOAD_KEY(r5,r4,0)
LOAD_KEY(r6,r4,4)
LOAD_KEY(r7,r4,8)
LOAD_KEY(r8,r4,12)
stw r5,0(r3) /* key[0..3] = input data */
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
li r16,10 /* 10 expansion rounds */
lis r0,0x0100 /* RCO(1) */
ppc_expand_128_loop:
addi r3,r3,16
mr r14,r8 /* apply LS_BOX to 4th temp */
rotlwi r14,r14,8
LS_BOX(r14, r15, r4)
xor r14,r14,r0
xor r5,r5,r14 /* xor next 4 keys */
xor r6,r6,r5
xor r7,r7,r6
xor r8,r8,r7
stw r5,0(r3) /* store next 4 keys */
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
subi r16,r16,1
cmpwi r16,0
bt eq,ppc_expand_128_end
b ppc_expand_128_loop
ppc_expand_128_end:
FINALIZE_KEY
blr
/*
* ppc_expand_key_192(u32 *key_enc, const u8 *key)
*
* Expand 192 bit key into 208 bytes encryption key. It consists of key
* itself plus 12 rounds with 16 bytes each
*
*/
_GLOBAL(ppc_expand_key_192)
INITIALIZE_KEY
LOAD_KEY(r5,r4,0)
LOAD_KEY(r6,r4,4)
LOAD_KEY(r7,r4,8)
LOAD_KEY(r8,r4,12)
LOAD_KEY(r9,r4,16)
LOAD_KEY(r10,r4,20)
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
stw r9,16(r3)
stw r10,20(r3)
li r16,8 /* 8 expansion rounds */
lis r0,0x0100 /* RCO(1) */
ppc_expand_192_loop:
addi r3,r3,24
mr r14,r10 /* apply LS_BOX to 6th temp */
rotlwi r14,r14,8
LS_BOX(r14, r15, r4)
xor r14,r14,r0
xor r5,r5,r14 /* xor next 6 keys */
xor r6,r6,r5
xor r7,r7,r6
xor r8,r8,r7
xor r9,r9,r8
xor r10,r10,r9
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
subi r16,r16,1
cmpwi r16,0 /* last round early kick out */
bt eq,ppc_expand_192_end
stw r9,16(r3)
stw r10,20(r3)
GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
b ppc_expand_192_loop
ppc_expand_192_end:
FINALIZE_KEY
blr
/*
* ppc_expand_key_256(u32 *key_enc, const u8 *key)
*
* Expand 256 bit key into 240 bytes encryption key. It consists of key
* itself plus 14 rounds with 16 bytes each
*
*/
_GLOBAL(ppc_expand_key_256)
INITIALIZE_KEY
LOAD_KEY(r5,r4,0)
LOAD_KEY(r6,r4,4)
LOAD_KEY(r7,r4,8)
LOAD_KEY(r8,r4,12)
LOAD_KEY(r9,r4,16)
LOAD_KEY(r10,r4,20)
LOAD_KEY(r11,r4,24)
LOAD_KEY(r12,r4,28)
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
stw r9,16(r3)
stw r10,20(r3)
stw r11,24(r3)
stw r12,28(r3)
li r16,7 /* 7 expansion rounds */
lis r0,0x0100 /* RCO(1) */
ppc_expand_256_loop:
addi r3,r3,32
mr r14,r12 /* apply LS_BOX to 8th temp */
rotlwi r14,r14,8
LS_BOX(r14, r15, r4)
xor r14,r14,r0
xor r5,r5,r14 /* xor 4 keys */
xor r6,r6,r5
xor r7,r7,r6
xor r8,r8,r7
mr r14,r8
LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
xor r9,r9,r14 /* xor 4 keys */
xor r10,r10,r9
xor r11,r11,r10
xor r12,r12,r11
stw r5,0(r3)
stw r6,4(r3)
stw r7,8(r3)
stw r8,12(r3)
subi r16,r16,1
cmpwi r16,0 /* last round early kick out */
bt eq,ppc_expand_256_end
stw r9,16(r3)
stw r10,20(r3)
stw r11,24(r3)
stw r12,28(r3)
GF8_MUL(r0, r0, r4, r14)
b ppc_expand_256_loop
ppc_expand_256_end:
FINALIZE_KEY
blr
/*
* ppc_generate_decrypt_key: derive decryption key from encryption key
* number of bytes to handle are calculated from length of key (16/24/32)
*
*/
_GLOBAL(ppc_generate_decrypt_key)
addi r6,r5,24
slwi r6,r6,2
lwzx r7,r4,r6 /* first/last 4 words are same */
stw r7,0(r3)
lwz r7,0(r4)
stwx r7,r3,r6
addi r6,r6,4
lwzx r7,r4,r6
stw r7,4(r3)
lwz r7,4(r4)
stwx r7,r3,r6
addi r6,r6,4
lwzx r7,r4,r6
stw r7,8(r3)
lwz r7,8(r4)
stwx r7,r3,r6
addi r6,r6,4
lwzx r7,r4,r6
stw r7,12(r3)
lwz r7,12(r4)
stwx r7,r3,r6
addi r3,r3,16
add r4,r4,r6
subi r4,r4,28
addi r5,r5,20
srwi r5,r5,2
ppc_generate_decrypt_block:
li r6,4
mtctr r6
ppc_generate_decrypt_word:
lwz r6,0(r4)
GF8_MUL(r7, r6, r0, r7)
GF8_MUL(r8, r7, r0, r8)
GF8_MUL(r9, r8, r0, r9)
xor r10,r9,r6
xor r11,r7,r8
xor r11,r11,r9
xor r12,r7,r10
rotrwi r12,r12,24
xor r11,r11,r12
xor r12,r8,r10
rotrwi r12,r12,16
xor r11,r11,r12
rotrwi r12,r10,8
xor r11,r11,r12
stw r11,0(r3)
addi r3,r3,4
addi r4,r4,4
bdnz ppc_generate_decrypt_word
subi r4,r4,32
subi r5,r5,1
cmpwi r5,0
bt gt,ppc_generate_decrypt_block
blr

View File

@ -0,0 +1,630 @@
/*
* AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"
#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
#define LOAD_DATA(reg, off) \
lwz reg,off(rSP); /* load with offset */
#define SAVE_DATA(reg, off) \
stw reg,off(rDP); /* save with offset */
#define NEXT_BLOCK \
addi rSP,rSP,16; /* increment pointers per bloc */ \
addi rDP,rDP,16;
#define LOAD_IV(reg, off) \
lwz reg,off(rIP); /* IV loading with offset */
#define SAVE_IV(reg, off) \
stw reg,off(rIP); /* IV saving with offset */
#define START_IV /* nothing to reset */
#define CBC_DEC 16 /* CBC decrement per block */
#define CTR_DEC 1 /* CTR decrement one byte */
#else /* Macros for little endian */
#define LOAD_DATA(reg, off) \
lwbrx reg,0,rSP; /* load reversed */ \
addi rSP,rSP,4; /* and increment pointer */
#define SAVE_DATA(reg, off) \
stwbrx reg,0,rDP; /* save reversed */ \
addi rDP,rDP,4; /* and increment pointer */
#define NEXT_BLOCK /* nothing todo */
#define LOAD_IV(reg, off) \
lwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define SAVE_IV(reg, off) \
stwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define START_IV \
subi rIP,rIP,16; /* must reset pointer */
#define CBC_DEC 32 /* 2 blocks because of incs */
#define CTR_DEC 17 /* 1 block because of incs */
#endif
#define SAVE_0_REGS
#define LOAD_0_REGS
#define SAVE_4_REGS \
stw rI0,96(r1); /* save 32 bit registers */ \
stw rI1,100(r1); \
stw rI2,104(r1); \
stw rI3,108(r1);
#define LOAD_4_REGS \
lwz rI0,96(r1); /* restore 32 bit registers */ \
lwz rI1,100(r1); \
lwz rI2,104(r1); \
lwz rI3,108(r1);
#define SAVE_8_REGS \
SAVE_4_REGS \
stw rG0,112(r1); /* save 32 bit registers */ \
stw rG1,116(r1); \
stw rG2,120(r1); \
stw rG3,124(r1);
#define LOAD_8_REGS \
LOAD_4_REGS \
lwz rG0,112(r1); /* restore 32 bit registers */ \
lwz rG1,116(r1); \
lwz rG2,120(r1); \
lwz rG3,124(r1);
#define INITIALIZE_CRYPT(tab,nr32bitregs) \
mflr r0; \
stwu r1,-160(r1); /* create stack frame */ \
lis rT0,tab@h; /* en-/decryption table pointer */ \
stw r0,8(r1); /* save link register */ \
ori rT0,rT0,tab@l; \
evstdw r14,16(r1); \
mr rKS,rKP; \
evstdw r15,24(r1); /* We must save non volatile */ \
evstdw r16,32(r1); /* registers. Take the chance */ \
evstdw r17,40(r1); /* and save the SPE part too */ \
evstdw r18,48(r1); \
evstdw r19,56(r1); \
evstdw r20,64(r1); \
evstdw r21,72(r1); \
evstdw r22,80(r1); \
evstdw r23,88(r1); \
SAVE_##nr32bitregs##_REGS
#define FINALIZE_CRYPT(nr32bitregs) \
lwz r0,8(r1); \
evldw r14,16(r1); /* restore SPE registers */ \
evldw r15,24(r1); \
evldw r16,32(r1); \
evldw r17,40(r1); \
evldw r18,48(r1); \
evldw r19,56(r1); \
evldw r20,64(r1); \
evldw r21,72(r1); \
evldw r22,80(r1); \
evldw r23,88(r1); \
LOAD_##nr32bitregs##_REGS \
mtlr r0; /* restore link register */ \
xor r0,r0,r0; \
stw r0,16(r1); /* delete sensitive data */ \
stw r0,24(r1); /* that we might have pushed */ \
stw r0,32(r1); /* from other context that runs */ \
stw r0,40(r1); /* the same code */ \
stw r0,48(r1); \
stw r0,56(r1); \
stw r0,64(r1); \
stw r0,72(r1); \
stw r0,80(r1); \
stw r0,88(r1); \
addi r1,r1,160; /* cleanup stack frame */
#define ENDIAN_SWAP(t0, t1, s0, s1) \
rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
rotrwi t1,s1,8; \
rlwimi t0,s0,8,8,15; \
rlwimi t1,s1,8,8,15; \
rlwimi t0,s0,8,24,31; \
rlwimi t1,s1,8,24,31;
#define GF128_MUL(d0, d1, d2, d3, t0) \
li t0,0x87; /* multiplication in GF128 */ \
cmpwi d3,-1; \
iselgt t0,0,t0; \
rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
rotlwi d3,d3,1; \
rlwimi d2,d1,0,0,0; \
rotlwi d2,d2,1; \
rlwimi d1,d0,0,0,0; \
slwi d0,d0,1; /* shift left 128 bit */ \
rotlwi d1,d1,1; \
xor d0,d0,t0;
#define START_KEY(d0, d1, d2, d3) \
lwz rW0,0(rKP); \
mtctr rRR; \
lwz rW1,4(rKP); \
lwz rW2,8(rKP); \
lwz rW3,12(rKP); \
xor rD0,d0,rW0; \
xor rD1,d1,rW1; \
xor rD2,d2,rW2; \
xor rD3,d3,rW3;
/*
* ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds)
*
* called from glue layer to encrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
LOAD_DATA(rD0, 0)
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds)
*
* called from glue layer to decrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes);
*
* called from glue layer to encrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
ppc_encrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes);
*
* called from glue layer to decrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
addi rT1,rT0,4096
ppc_decrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_decrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
* 32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt multiple blocks via CBC
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
LOAD_IV(rI3, 12)
ppc_encrypt_cbc_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
SAVE_DATA(rI0, 0)
xor rI1,rD1,rW1
SAVE_DATA(rI1, 4)
xor rI2,rD2,rW2
SAVE_DATA(rI2, 8)
xor rI3,rD3,rW3
SAVE_DATA(rI3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_cbc_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to decrypt multiple blocks via CBC
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
li rT1,15
LOAD_IV(rI0, 0)
andc rLN,rLN,rT1
LOAD_IV(rI1, 4)
subi rLN,rLN,16
LOAD_IV(rI2, 8)
add rSP,rSP,rLN /* reverse processing */
LOAD_IV(rI3, 12)
add rDP,rDP,rLN
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_IV
SAVE_IV(rD0, 0)
SAVE_IV(rD1, 4)
SAVE_IV(rD2, 8)
cmpwi rLN,16
SAVE_IV(rD3, 12)
bt lt,ppc_decrypt_cbc_end
ppc_decrypt_cbc_loop:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
subi rLN,rLN,16
subi rSP,rSP,CBC_DEC
xor rW0,rD0,rW0
LOAD_DATA(rD0, 0)
xor rW1,rD1,rW1
LOAD_DATA(rD1, 4)
xor rW2,rD2,rW2
LOAD_DATA(rD2, 8)
xor rW3,rD3,rW3
LOAD_DATA(rD3, 12)
xor rW0,rW0,rD0
SAVE_DATA(rW0, 0)
xor rW1,rW1,rD1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rD2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rD3
SAVE_DATA(rW3, 12)
cmpwi rLN,15
subi rDP,rDP,CBC_DEC
bt gt,ppc_decrypt_cbc_loop
ppc_decrypt_cbc_end:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rW0,rW0,rD0
xor rW1,rW1,rD1
xor rW2,rW2,rD2
xor rW3,rW3,rD3
xor rW0,rW0,rI0 /* decrypt with initial IV */
SAVE_DATA(rW0, 0)
xor rW1,rW1,rI1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rI2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rI3
SAVE_DATA(rW3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt/decrypt multiple blocks
* via CTR. Number of bytes does not need to be a multiple of
* 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_crypt_ctr)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rLN,16
LOAD_IV(rI3, 12)
START_IV
bt lt,ppc_crypt_ctr_partial
ppc_crypt_ctr_loop:
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
xor rW1,rD1,rW1
xor rW2,rD2,rW2
xor rW3,rD3,rW3
LOAD_DATA(rD0, 0)
subi rLN,rLN,16
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
addic rI3,rI3,1 /* increase counter */
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
NEXT_BLOCK
cmpwi rLN,15
bt gt,ppc_crypt_ctr_loop
ppc_crypt_ctr_partial:
cmpwi rLN,0
bt eq,ppc_crypt_ctr_end
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
SAVE_IV(rW0, 0)
xor rW1,rD1,rW1
SAVE_IV(rW1, 4)
xor rW2,rD2,rW2
SAVE_IV(rW2, 8)
xor rW3,rD3,rW3
SAVE_IV(rW3, 12)
mtctr rLN
subi rIP,rIP,CTR_DEC
subi rSP,rSP,1
subi rDP,rDP,1
ppc_crypt_ctr_xorbyte:
lbzu rW4,1(rIP) /* bytewise xor for partial block */
lbzu rW5,1(rSP)
xor rW4,rW4,rW5
stbu rW4,1(rDP)
bdnz ppc_crypt_ctr_xorbyte
subf rIP,rLN,rIP
addi rIP,rIP,1
addic rI3,rI3,1
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
ppc_crypt_ctr_end:
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
*
* called from glue layer to encrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_encrypt_xts_notweak
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
ppc_encrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_encrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_encrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr
/*
* ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
*
* called from glue layer to decrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
LOAD_IV(rI0, 0)
addi rT1,rT0,4096
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_decrypt_xts_notweak
subi rT0,rT0,4096
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
addi rT0,rT0,4096
ppc_decrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_decrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_decrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr

View File

@ -0,0 +1,42 @@
/*
* Common registers for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#define rKS r0 /* copy of en-/decryption key pointer */
#define rDP r3 /* destination pointer */
#define rSP r4 /* source pointer */
#define rKP r5 /* pointer to en-/decryption key pointer */
#define rRR r6 /* en-/decryption rounds */
#define rLN r7 /* length of data to be processed */
#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
#define rKT r9 /* pointer to tweak key (XTS mode) */
#define rT0 r11 /* pointers to en-/decrpytion tables */
#define rT1 r10
#define rD0 r9 /* data */
#define rD1 r14
#define rD2 r12
#define rD3 r15
#define rW0 r16 /* working registers */
#define rW1 r17
#define rW2 r18
#define rW3 r19
#define rW4 r20
#define rW5 r21
#define rW6 r22
#define rW7 r23
#define rI0 r24 /* IV */
#define rI1 r25
#define rI2 r26
#define rI3 r27
#define rG0 r28 /* endian reversed tweak (XTS mode) */
#define rG1 r29
#define rG2 r30
#define rG3 r31

View File

@ -0,0 +1,331 @@
/*
* 4K AES tables for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
/*
* These big endian AES encryption/decryption tables have been taken from
* crypto/aes_generic.c and are designed to be simply accessed by a combination
* of rlwimi/lwz instructions with a minimum of table registers (usually only
* one required). Thus they are aligned to 4K. The locality of rotated values
* is derived from the reduced offsets that are available in the SPE load
* instructions. E.g. evldw, evlwwsplat, ...
*
* For the safety-conscious it has to be noted that they might be vulnerable
* to cache timing attacks because of their size. Nevertheless in contrast to
* the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
* This is a quite good tradeoff for low power devices (e.g. routers) without
* dedicated encryption hardware where we usually have no multiuser
* environment.
*
*/
#define R(a, b, c, d) \
0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
.data
.align 12
.globl PPC_AES_4K_ENCTAB
PPC_AES_4K_ENCTAB:
/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
.long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
.long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
.long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
.long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
.long R(60, 30, 30, 50), R(02, 01, 01, 03)
.long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
.long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
.long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
.long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
.long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
.long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
.long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
.long R(41, ad, ad, ec), R(b3, d4, d4, 67)
.long R(5f, a2, a2, fd), R(45, af, af, ea)
.long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
.long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
.long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
.long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
.long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
.long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
.long R(68, 34, 34, 5c), R(51, a5, a5, f4)
.long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
.long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
.long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
.long R(08, 04, 04, 0c), R(95, c7, c7, 52)
.long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
.long R(30, 18, 18, 28), R(37, 96, 96, a1)
.long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
.long R(0e, 07, 07, 09), R(24, 12, 12, 36)
.long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
.long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
.long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
.long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
.long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
.long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
.long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
.long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
.long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
.long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
.long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
.long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
.long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
.long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
.long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
.long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
.long R(67, be, be, d9), R(72, 39, 39, 4b)
.long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
.long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
.long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
.long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
.long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
.long R(66, 33, 33, 55), R(11, 85, 85, 94)
.long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
.long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
.long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
.long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
.long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
.long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
.long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
.long R(70, 38, 38, 48), R(f1, f5, f5, 04)
.long R(63, bc, bc, df), R(77, b6, b6, c1)
.long R(af, da, da, 75), R(42, 21, 21, 63)
.long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
.long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
.long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
.long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
.long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
.long R(88, 44, 44, cc), R(2e, 17, 17, 39)
.long R(93, c4, c4, 57), R(55, a7, a7, f2)
.long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
.long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
.long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
.long R(c0, 60, 60, a0), R(19, 81, 81, 98)
.long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
.long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
.long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
.long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
.long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
.long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
.long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
.long R(db, e0, e0, 3b), R(64, 32, 32, 56)
.long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
.long R(92, 49, 49, db), R(0c, 06, 06, 0a)
.long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
.long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
.long R(43, ac, ac, ef), R(c4, 62, 62, a6)
.long R(39, 91, 91, a8), R(31, 95, 95, a4)
.long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
.long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
.long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
.long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
.long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
.long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
.long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
.long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
.long R(47, ae, ae, e9), R(10, 08, 08, 18)
.long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
.long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
.long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
.long R(73, b4, b4, c7), R(97, c6, c6, 51)
.long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
.long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
.long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
.long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
.long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
.long R(71, b5, b5, c4), R(cc, 66, 66, aa)
.long R(90, 48, 48, d8), R(06, 03, 03, 05)
.long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
.long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
.long R(ae, 57, 57, f9), R(69, b9, b9, d0)
.long R(17, 86, 86, 91), R(99, c1, c1, 58)
.long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
.long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
.long R(2b, 98, 98, b3), R(22, 11, 11, 33)
.long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
.long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
.long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
.long R(15, 87, 87, 92), R(c9, e9, e9, 20)
.long R(87, ce, ce, 49), R(aa, 55, 55, ff)
.long R(50, 28, 28, 78), R(a5, df, df, 7a)
.long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
.long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
.long R(65, bf, bf, da), R(d7, e6, e6, 31)
.long R(84, 42, 42, c6), R(d0, 68, 68, b8)
.long R(82, 41, 41, c3), R(29, 99, 99, b0)
.long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
.long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
.long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
.globl PPC_AES_4K_DECTAB
PPC_AES_4K_DECTAB:
/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
.long R(51, f4, a7, 50), R(7e, 41, 65, 53)
.long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
.long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
.long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
.long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
.long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
.long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
.long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
.long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
.long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
.long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
.long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
.long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
.long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
.long R(d4, be, 83, 2d), R(58, 74, 21, d3)
.long R(49, e0, 69, 29), R(8e, c9, c8, 44)
.long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
.long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
.long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
.long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
.long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
.long R(97, 51, 33, 60), R(62, 53, 7f, 45)
.long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
.long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
.long R(70, 48, 68, 58), R(8f, 45, fd, 19)
.long R(94, de, 6c, 87), R(52, 7b, f8, b7)
.long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
.long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
.long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
.long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
.long R(30, 28, 87, f2), R(23, bf, a5, b2)
.long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
.long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
.long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
.long R(65, da, f4, cd), R(06, 05, be, d5)
.long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
.long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
.long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
.long R(0b, 83, ec, 39), R(40, 60, ef, aa)
.long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
.long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
.long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
.long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
.long R(04, 06, d4, 6f), R(60, 50, 15, ff)
.long R(19, 98, fb, 24), R(d6, bd, e9, 97)
.long R(89, 40, 43, cc), R(67, d9, 9e, 77)
.long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
.long R(e7, 19, 5b, 38), R(79, c8, ee, db)
.long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
.long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
.long R(09, 80, 86, 83), R(32, 2b, ed, 48)
.long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
.long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
.long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
.long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
.long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
.long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
.long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
.long R(80, c0, c5, 4f), R(61, dc, 20, a2)
.long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
.long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
.long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
.long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
.long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
.long R(57, f1, 19, 85), R(af, 75, 07, 4c)
.long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
.long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
.long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
.long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
.long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
.long R(d7, 31, dc, ca), R(42, 63, 85, 10)
.long R(13, 97, 22, 40), R(84, c6, 11, 20)
.long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
.long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
.long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
.long R(0d, 86, 52, ec), R(77, c1, e3, d0)
.long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
.long R(11, 94, 48, fa), R(47, e9, 64, 22)
.long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
.long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
.long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
.long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
.long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
.long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
.long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
.long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
.long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
.long R(2e, 39, f7, 5e), R(82, c3, af, f5)
.long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
.long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
.long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
.long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
.long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
.long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
.long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
.long R(21, bc, cf, 08), R(ef, 15, e8, e6)
.long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
.long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
.long R(31, a4, b2, af), R(2a, 3f, 23, 31)
.long R(c6, a5, 94, 30), R(35, a2, 66, c0)
.long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
.long R(e0, 90, d0, b0), R(33, a7, d8, 15)
.long R(f1, 04, 98, 4a), R(41, ec, da, f7)
.long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
.long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
.long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
.long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
.long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
.long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
.long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
.long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
.long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
.long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
.long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
.long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
.long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
.long R(9c, d2, df, 59), R(55, f2, 73, 3f)
.long R(18, 14, ce, 79), R(73, c7, 37, bf)
.long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
.long R(df, 3d, 6f, 14), R(78, 44, db, 86)
.long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
.long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
.long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
.long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
.long R(39, a8, 01, 71), R(08, 0c, b3, de)
.long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
.long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
.long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
.globl PPC_AES_4K_DECTAB2
PPC_AES_4K_DECTAB2:
/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d

View File

@ -0,0 +1,243 @@
/*
* Fast MD5 implementation for PPC
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#define rHP r3
#define rWP r4
#define rH0 r0
#define rH1 r6
#define rH2 r7
#define rH3 r5
#define rW00 r8
#define rW01 r9
#define rW02 r10
#define rW03 r11
#define rW04 r12
#define rW05 r14
#define rW06 r15
#define rW07 r16
#define rW08 r17
#define rW09 r18
#define rW10 r19
#define rW11 r20
#define rW12 r21
#define rW13 r22
#define rW14 r23
#define rW15 r24
#define rT0 r25
#define rT1 r26
#define INITIALIZE \
PPC_STLU r1,-INT_FRAME_SIZE(r1); \
SAVE_8GPRS(14, r1); /* push registers onto stack */ \
SAVE_4GPRS(22, r1); \
SAVE_GPR(26, r1)
#define FINALIZE \
REST_8GPRS(14, r1); /* pop registers from stack */ \
REST_4GPRS(22, r1); \
REST_GPR(26, r1); \
addi r1,r1,INT_FRAME_SIZE;
#ifdef __BIG_ENDIAN__
#define LOAD_DATA(reg, off) \
lwbrx reg,0,rWP; /* load data */
#define INC_PTR \
addi rWP,rWP,4; /* increment per word */
#define NEXT_BLOCK /* nothing to do */
#else
#define LOAD_DATA(reg, off) \
lwz reg,off(rWP); /* load data */
#define INC_PTR /* nothing to do */
#define NEXT_BLOCK \
addi rWP,rWP,64; /* increment per block */
#endif
#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
LOAD_DATA(w0, off) /* W */ \
and rT0,b,c; /* 1: f = b and c */ \
INC_PTR /* ptr++ */ \
andc rT1,d,b; /* 1: f' = ~b and d */ \
LOAD_DATA(w1, off+4) /* W */ \
or rT0,rT0,rT1; /* 1: f = f or f' */ \
addi w0,w0,k0l; /* 1: wk = w + k */ \
add a,a,rT0; /* 1: a = a + f */ \
addis w0,w0,k0h; /* 1: wk = w + k' */ \
addis w1,w1,k1h; /* 2: wk = w + k */ \
add a,a,w0; /* 1: a = a + wk */ \
addi w1,w1,k1l; /* 2: wk = w + k' */ \
rotrwi a,a,p; /* 1: a = a rotl x */ \
add d,d,w1; /* 2: a = a + wk */ \
add a,a,b; /* 1: a = a + b */ \
and rT0,a,b; /* 2: f = b and c */ \
andc rT1,c,a; /* 2: f' = ~b and d */ \
or rT0,rT0,rT1; /* 2: f = f or f' */ \
add d,d,rT0; /* 2: a = a + f */ \
INC_PTR /* ptr++ */ \
rotrwi d,d,q; /* 2: a = a rotl x */ \
add d,d,a; /* 2: a = a + b */
#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
andc rT0,c,d; /* 1: f = c and ~d */ \
and rT1,b,d; /* 1: f' = b and d */ \
addi w0,w0,k0l; /* 1: wk = w + k */ \
or rT0,rT0,rT1; /* 1: f = f or f' */ \
addis w0,w0,k0h; /* 1: wk = w + k' */ \
add a,a,rT0; /* 1: a = a + f */ \
addi w1,w1,k1l; /* 2: wk = w + k */ \
add a,a,w0; /* 1: a = a + wk */ \
addis w1,w1,k1h; /* 2: wk = w + k' */ \
andc rT0,b,c; /* 2: f = c and ~d */ \
rotrwi a,a,p; /* 1: a = a rotl x */ \
add a,a,b; /* 1: a = a + b */ \
add d,d,w1; /* 2: a = a + wk */ \
and rT1,a,c; /* 2: f' = b and d */ \
or rT0,rT0,rT1; /* 2: f = f or f' */ \
add d,d,rT0; /* 2: a = a + f */ \
rotrwi d,d,q; /* 2: a = a rotl x */ \
add d,d,a; /* 2: a = a +b */
#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
xor rT0,b,c; /* 1: f' = b xor c */ \
addi w0,w0,k0l; /* 1: wk = w + k */ \
xor rT1,rT0,d; /* 1: f = f xor f' */ \
addis w0,w0,k0h; /* 1: wk = w + k' */ \
add a,a,rT1; /* 1: a = a + f */ \
addi w1,w1,k1l; /* 2: wk = w + k */ \
add a,a,w0; /* 1: a = a + wk */ \
addis w1,w1,k1h; /* 2: wk = w + k' */ \
rotrwi a,a,p; /* 1: a = a rotl x */ \
add d,d,w1; /* 2: a = a + wk */ \
add a,a,b; /* 1: a = a + b */ \
xor rT1,rT0,a; /* 2: f = b xor f' */ \
add d,d,rT1; /* 2: a = a + f */ \
rotrwi d,d,q; /* 2: a = a rotl x */ \
add d,d,a; /* 2: a = a + b */
#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
addi w0,w0,k0l; /* 1: w = w + k */ \
orc rT0,b,d; /* 1: f = b or ~d */ \
addis w0,w0,k0h; /* 1: w = w + k' */ \
xor rT0,rT0,c; /* 1: f = f xor c */ \
add a,a,w0; /* 1: a = a + wk */ \
addi w1,w1,k1l; /* 2: w = w + k */ \
add a,a,rT0; /* 1: a = a + f */ \
addis w1,w1,k1h; /* 2: w = w + k' */ \
rotrwi a,a,p; /* 1: a = a rotl x */ \
add a,a,b; /* 1: a = a + b */ \
orc rT0,a,c; /* 2: f = b or ~d */ \
add d,d,w1; /* 2: a = a + wk */ \
xor rT0,rT0,b; /* 2: f = f xor c */ \
add d,d,rT0; /* 2: a = a + f */ \
rotrwi d,d,q; /* 2: a = a rotl x */ \
add d,d,a; /* 2: a = a + b */
_GLOBAL(ppc_md5_transform)
INITIALIZE
mtctr r5
lwz rH0,0(rHP)
lwz rH1,4(rHP)
lwz rH2,8(rHP)
lwz rH3,12(rHP)
ppc_md5_main:
R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
0xd76b, -23432, 0xe8c8, -18602)
R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
0x2420, 0x70db, 0xc1be, -12562)
R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
0xf57c, 0x0faf, 0x4788, -14806)
R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
0xa830, 0x4613, 0xfd47, -27391)
R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
0x6981, -26408, 0x8b45, -2129)
R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
0xffff, 0x5bb1, 0x895d, -10306)
R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
0x6b90, 0x1122, 0xfd98, 0x7193)
R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
0xa679, 0x438e, 0x49b4, 0x0821)
R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
0x0d56, 0x6e0c, 0x1810, 0x6d2d)
R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
0x9d02, -32109, 0x124c, 0x2332)
R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
0x8ea7, 0x4a33, 0x0245, -18270)
R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
0x8eee, -8608, 0xf258, -5095)
R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
0x969d, -10697, 0x1cbe, -15288)
R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
0x3317, 0x3e99, 0xdbd9, 0x7c15)
R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
0xac4b, 0x7772, 0xd8cf, 0x331d)
R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
0x6a28, 0x6dd8, 0x219a, 0x3b68)
R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
0x29cb, 0x28e5, 0x4218, -7788)
R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
0x473f, 0x06d1, 0x3aae, 0x3036)
R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
0xaea1, -15134, 0x640b, -11295)
R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
0x8f4c, 0x4887, 0xbc7c, -22499)
R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
0x7eb8, -27199, 0x00ea, 0x6050)
R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
0xe01a, 0x22fe, 0x4447, 0x69c5)
R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
0xb7f3, 0x0253, 0x59b1, 0x4d5b)
R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
0x4701, -27017, 0xc7bd, -19859)
R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
0x0988, -1462, 0x4c70, -19401)
R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
0xadaf, -5221, 0xfc99, 0x66f7)
R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
0x7e80, -16418, 0xba1e, -25587)
R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
0x4130, 0x380d, 0xe0c5, 0x738d)
lwz rW00,0(rHP)
R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
0xe837, -30770, 0xde8a, 0x69e8)
lwz rW14,4(rHP)
R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
0x9e79, 0x260f, 0x256d, -27941)
lwz rW12,8(rHP)
R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
0xab75, -20775, 0x4f9e, -28397)
lwz rW10,12(rHP)
R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
0x662b, 0x7c56, 0x11b2, 0x0358)
add rH0,rH0,rW00
stw rH0,0(rHP)
add rH1,rH1,rW14
stw rH1,4(rHP)
add rH2,rH2,rW12
stw rH2,8(rHP)
add rH3,rH3,rW10
stw rH3,12(rHP)
NEXT_BLOCK
bdnz ppc_md5_main
FINALIZE
blr

View File

@ -0,0 +1,165 @@
/*
* Glue code for MD5 implementation for PPC assembler
*
* Based on generic implementation.
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/md5.h>
#include <asm/byteorder.h>
extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
static inline void ppc_md5_clear_context(struct md5_state *sctx)
{
int count = sizeof(struct md5_state) >> 2;
u32 *ptr = (u32 *)sctx;
/* make sure we can clear the fast way */
BUILD_BUG_ON(sizeof(struct md5_state) % 4);
do { *ptr++ = 0; } while (--count);
}
static int ppc_md5_init(struct shash_desc *desc)
{
struct md5_state *sctx = shash_desc_ctx(desc);
sctx->hash[0] = 0x67452301;
sctx->hash[1] = 0xefcdab89;
sctx->hash[2] = 0x98badcfe;
sctx->hash[3] = 0x10325476;
sctx->byte_count = 0;
return 0;
}
static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct md5_state *sctx = shash_desc_ctx(desc);
const unsigned int offset = sctx->byte_count & 0x3f;
unsigned int avail = 64 - offset;
const u8 *src = data;
sctx->byte_count += len;
if (avail > len) {
memcpy((char *)sctx->block + offset, src, len);
return 0;
}
if (offset) {
memcpy((char *)sctx->block + offset, src, avail);
ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
len -= avail;
src += avail;
}
if (len > 63) {
ppc_md5_transform(sctx->hash, src, len >> 6);
src += len & ~0x3f;
len &= 0x3f;
}
memcpy((char *)sctx->block, src, len);
return 0;
}
static int ppc_md5_final(struct shash_desc *desc, u8 *out)
{
struct md5_state *sctx = shash_desc_ctx(desc);
const unsigned int offset = sctx->byte_count & 0x3f;
const u8 *src = (const u8 *)sctx->block;
u8 *p = (u8 *)src + offset;
int padlen = 55 - offset;
__le64 *pbits = (__le64 *)((char *)sctx->block + 56);
__le32 *dst = (__le32 *)out;
*p++ = 0x80;
if (padlen < 0) {
memset(p, 0x00, padlen + sizeof (u64));
ppc_md5_transform(sctx->hash, src, 1);
p = (char *)sctx->block;
padlen = 56;
}
memset(p, 0, padlen);
*pbits = cpu_to_le64(sctx->byte_count << 3);
ppc_md5_transform(sctx->hash, src, 1);
dst[0] = cpu_to_le32(sctx->hash[0]);
dst[1] = cpu_to_le32(sctx->hash[1]);
dst[2] = cpu_to_le32(sctx->hash[2]);
dst[3] = cpu_to_le32(sctx->hash[3]);
ppc_md5_clear_context(sctx);
return 0;
}
static int ppc_md5_export(struct shash_desc *desc, void *out)
{
struct md5_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int ppc_md5_import(struct shash_desc *desc, const void *in)
{
struct md5_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = MD5_DIGEST_SIZE,
.init = ppc_md5_init,
.update = ppc_md5_update,
.final = ppc_md5_final,
.export = ppc_md5_export,
.import = ppc_md5_import,
.descsize = sizeof(struct md5_state),
.statesize = sizeof(struct md5_state),
.base = {
.cra_name = "md5",
.cra_driver_name= "md5-ppc",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init ppc_md5_mod_init(void)
{
return crypto_register_shash(&alg);
}
static void __exit ppc_md5_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(ppc_md5_mod_init);
module_exit(ppc_md5_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
MODULE_ALIAS_CRYPTO("md5");
MODULE_ALIAS_CRYPTO("md5-ppc");

View File

@ -0,0 +1,299 @@
/*
* Fast SHA-1 implementation for SPE instruction set (PPC)
*
* This code makes use of the SPE SIMD instruction set as defined in
* http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
* Implementation is based on optimization guide notes from
* http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#define rHP r3 /* pointer to hash value */
#define rWP r4 /* pointer to input */
#define rKP r5 /* pointer to constants */
#define rW0 r14 /* 64 bit round words */
#define rW1 r15
#define rW2 r16
#define rW3 r17
#define rW4 r18
#define rW5 r19
#define rW6 r20
#define rW7 r21
#define rH0 r6 /* 32 bit hash values */
#define rH1 r7
#define rH2 r8
#define rH3 r9
#define rH4 r10
#define rT0 r22 /* 64 bit temporary */
#define rT1 r0 /* 32 bit temporaries */
#define rT2 r11
#define rT3 r12
#define rK r23 /* 64 bit constant in volatile register */
#define LOAD_K01
#define LOAD_K11 \
evlwwsplat rK,0(rKP);
#define LOAD_K21 \
evlwwsplat rK,4(rKP);
#define LOAD_K31 \
evlwwsplat rK,8(rKP);
#define LOAD_K41 \
evlwwsplat rK,12(rKP);
#define INITIALIZE \
stwu r1,-128(r1); /* create stack frame */ \
evstdw r14,8(r1); /* We must save non volatile */ \
evstdw r15,16(r1); /* registers. Take the chance */ \
evstdw r16,24(r1); /* and save the SPE part too */ \
evstdw r17,32(r1); \
evstdw r18,40(r1); \
evstdw r19,48(r1); \
evstdw r20,56(r1); \
evstdw r21,64(r1); \
evstdw r22,72(r1); \
evstdw r23,80(r1);
#define FINALIZE \
evldw r14,8(r1); /* restore SPE registers */ \
evldw r15,16(r1); \
evldw r16,24(r1); \
evldw r17,32(r1); \
evldw r18,40(r1); \
evldw r19,48(r1); \
evldw r20,56(r1); \
evldw r21,64(r1); \
evldw r22,72(r1); \
evldw r23,80(r1); \
xor r0,r0,r0; \
stw r0,8(r1); /* Delete sensitive data */ \
stw r0,16(r1); /* that we might have pushed */ \
stw r0,24(r1); /* from other context that runs */ \
stw r0,32(r1); /* the same code. Assume that */ \
stw r0,40(r1); /* the lower part of the GPRs */ \
stw r0,48(r1); /* were already overwritten on */ \
stw r0,56(r1); /* the way down to here */ \
stw r0,64(r1); \
stw r0,72(r1); \
stw r0,80(r1); \
addi r1,r1,128; /* cleanup stack frame */
#ifdef __BIG_ENDIAN__
#define LOAD_DATA(reg, off) \
lwz reg,off(rWP); /* load data */
#define NEXT_BLOCK \
addi rWP,rWP,64; /* increment per block */
#else
#define LOAD_DATA(reg, off) \
lwbrx reg,0,rWP; /* load data */ \
addi rWP,rWP,4; /* increment per word */
#define NEXT_BLOCK /* nothing to do */
#endif
#define R_00_15(a, b, c, d, e, w0, w1, k, off) \
LOAD_DATA(w0, off) /* 1: W */ \
and rT2,b,c; /* 1: F' = B and C */ \
LOAD_K##k##1 \
andc rT1,d,b; /* 1: F" = ~B and D */ \
rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \
or rT2,rT2,rT1; /* 1: F = F' or F" */ \
add e,e,rT0; /* 1: E = E + A' */ \
rotrwi b,b,2; /* 1: B = B rotl 30 */ \
add e,e,w0; /* 1: E = E + W */ \
LOAD_DATA(w1, off+4) /* 2: W */ \
add e,e,rT2; /* 1: E = E + F */ \
and rT1,a,b; /* 2: F' = B and C */ \
add e,e,rK; /* 1: E = E + K */ \
andc rT2,c,a; /* 2: F" = ~B and D */ \
add d,d,rK; /* 2: E = E + K */ \
or rT2,rT2,rT1; /* 2: F = F' or F" */ \
rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
add d,d,w1; /* 2: E = E + W */ \
rotrwi a,a,2; /* 2: B = B rotl 30 */ \
add d,d,rT0; /* 2: E = E + A' */ \
evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \
add d,d,rT2 /* 2: E = E + F */
#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
and rT2,b,c; /* 1: F' = B and C */ \
evmergelohi rT0,w7,w6; /* W[-3] */ \
andc rT1,d,b; /* 1: F" = ~B and D */ \
evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
or rT1,rT1,rT2; /* 1: F = F' or F" */ \
evxor w0,w0,w4; /* W = W xor W[-8] */ \
add e,e,rT1; /* 1: E = E + F */ \
evxor w0,w0,w1; /* W = W xor W[-14] */ \
rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
evrlwi w0,w0,1; /* W = W rotl 1 */ \
add e,e,rT2; /* 1: E = E + A' */ \
evaddw rT0,w0,rK; /* WK = W + K */ \
rotrwi b,b,2; /* 1: B = B rotl 30 */ \
LOAD_K##k##1 \
evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
add e,e,rT0; /* 1: E = E + WK */ \
add d,d,rT1; /* 2: E = E + WK */ \
and rT2,a,b; /* 2: F' = B and C */ \
andc rT1,c,a; /* 2: F" = ~B and D */ \
rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
or rT1,rT1,rT2; /* 2: F = F' or F" */ \
add d,d,rT0; /* 2: E = E + A' */ \
rotrwi a,a,2; /* 2: B = B rotl 30 */ \
add d,d,rT1 /* 2: E = E + F */
#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
evmergelohi rT0,w7,w6; /* W[-3] */ \
xor rT2,b,c; /* 1: F' = B xor C */ \
evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
xor rT2,rT2,d; /* 1: F = F' xor D */ \
evxor w0,w0,w4; /* W = W xor W[-8] */ \
add e,e,rT2; /* 1: E = E + F */ \
evxor w0,w0,w1; /* W = W xor W[-14] */ \
rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
evrlwi w0,w0,1; /* W = W rotl 1 */ \
add e,e,rT2; /* 1: E = E + A' */ \
evaddw rT0,w0,rK; /* WK = W + K */ \
rotrwi b,b,2; /* 1: B = B rotl 30 */ \
LOAD_K##k##1 \
evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
add e,e,rT0; /* 1: E = E + WK */ \
xor rT2,a,b; /* 2: F' = B xor C */ \
add d,d,rT1; /* 2: E = E + WK */ \
xor rT2,rT2,c; /* 2: F = F' xor D */ \
rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
add d,d,rT2; /* 2: E = E + F */ \
rotrwi a,a,2; /* 2: B = B rotl 30 */ \
add d,d,rT0 /* 2: E = E + A' */
#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
and rT2,b,c; /* 1: F' = B and C */ \
evmergelohi rT0,w7,w6; /* W[-3] */ \
or rT1,b,c; /* 1: F" = B or C */ \
evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
and rT1,d,rT1; /* 1: F" = F" and D */ \
evxor w0,w0,w4; /* W = W xor W[-8] */ \
or rT2,rT2,rT1; /* 1: F = F' or F" */ \
evxor w0,w0,w1; /* W = W xor W[-14] */ \
add e,e,rT2; /* 1: E = E + F */ \
evrlwi w0,w0,1; /* W = W rotl 1 */ \
rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
evaddw rT0,w0,rK; /* WK = W + K */ \
add e,e,rT2; /* 1: E = E + A' */ \
LOAD_K##k##1 \
evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
rotrwi b,b,2; /* 1: B = B rotl 30 */ \
add e,e,rT0; /* 1: E = E + WK */ \
and rT2,a,b; /* 2: F' = B and C */ \
or rT0,a,b; /* 2: F" = B or C */ \
add d,d,rT1; /* 2: E = E + WK */ \
and rT0,c,rT0; /* 2: F" = F" and D */ \
rotrwi a,a,2; /* 2: B = B rotl 30 */ \
or rT2,rT2,rT0; /* 2: F = F' or F" */ \
rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
add d,d,rT2; /* 2: E = E + F */ \
add d,d,rT0 /* 2: E = E + A' */
#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
_GLOBAL(ppc_spe_sha1_transform)
INITIALIZE
lwz rH0,0(rHP)
lwz rH1,4(rHP)
mtctr r5
lwz rH2,8(rHP)
lis rKP,PPC_SPE_SHA1_K@h
lwz rH3,12(rHP)
ori rKP,rKP,PPC_SPE_SHA1_K@l
lwz rH4,16(rHP)
ppc_spe_sha1_main:
R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
lwz rT3,0(rHP)
R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
lwz rW1,4(rHP)
R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
lwz rW2,8(rHP)
R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
lwz rW3,12(rHP)
NEXT_BLOCK
lwz rW4,16(rHP)
add rH0,rH0,rT3
stw rH0,0(rHP)
add rH1,rH1,rW1
stw rH1,4(rHP)
add rH2,rH2,rW2
stw rH2,8(rHP)
add rH3,rH3,rW3
stw rH3,12(rHP)
add rH4,rH4,rW4
stw rH4,16(rHP)
bdnz ppc_spe_sha1_main
FINALIZE
blr
.data
.align 4
PPC_SPE_SHA1_K:
.long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6

View File

@ -0,0 +1,210 @@
/*
* Glue code for SHA-1 implementation for SPE instructions (PPC)
*
* Based on generic implementation.
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <linux/hardirq.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
* between preempt_disable() and preempt_enable(). SHA1 takes ~1000
* operations per 64 bytes. e500 cores can issue two arithmetic instructions
* per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
* Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
* Headroom for cache misses included. Even with the low end model clocked
* at 667 MHz this equals to a critical time window of less than 27us.
*
*/
#define MAX_BYTES 2048
extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
static void spe_begin(void)
{
/* We just start SPE operations and will save SPE registers later. */
preempt_disable();
enable_kernel_spe();
}
static void spe_end(void)
{
/* reenable preemption */
preempt_enable();
}
static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
{
int count = sizeof(struct sha1_state) >> 2;
u32 *ptr = (u32 *)sctx;
/* make sure we can clear the fast way */
BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
do { *ptr++ = 0; } while (--count);
}
static int ppc_spe_sha1_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA1_H0;
sctx->state[1] = SHA1_H1;
sctx->state[2] = SHA1_H2;
sctx->state[3] = SHA1_H3;
sctx->state[4] = SHA1_H4;
sctx->count = 0;
return 0;
}
static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
const unsigned int offset = sctx->count & 0x3f;
const unsigned int avail = 64 - offset;
unsigned int bytes;
const u8 *src = data;
if (avail > len) {
sctx->count += len;
memcpy((char *)sctx->buffer + offset, src, len);
return 0;
}
sctx->count += len;
if (offset) {
memcpy((char *)sctx->buffer + offset, src, avail);
spe_begin();
ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
spe_end();
len -= avail;
src += avail;
}
while (len > 63) {
bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
bytes = bytes & ~0x3f;
spe_begin();
ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
spe_end();
src += bytes;
len -= bytes;
};
memcpy((char *)sctx->buffer, src, len);
return 0;
}
static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
const unsigned int offset = sctx->count & 0x3f;
char *p = (char *)sctx->buffer + offset;
int padlen;
__be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
__be32 *dst = (__be32 *)out;
padlen = 55 - offset;
*p++ = 0x80;
spe_begin();
if (padlen < 0) {
memset(p, 0x00, padlen + sizeof (u64));
ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
p = (char *)sctx->buffer;
padlen = 56;
}
memset(p, 0, padlen);
*pbits = cpu_to_be64(sctx->count << 3);
ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
spe_end();
dst[0] = cpu_to_be32(sctx->state[0]);
dst[1] = cpu_to_be32(sctx->state[1]);
dst[2] = cpu_to_be32(sctx->state[2]);
dst[3] = cpu_to_be32(sctx->state[3]);
dst[4] = cpu_to_be32(sctx->state[4]);
ppc_sha1_clear_context(sctx);
return 0;
}
static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = ppc_spe_sha1_init,
.update = ppc_spe_sha1_update,
.final = ppc_spe_sha1_final,
.export = ppc_spe_sha1_export,
.import = ppc_spe_sha1_import,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init ppc_spe_sha1_mod_init(void)
{
return crypto_register_shash(&alg);
}
static void __exit ppc_spe_sha1_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(ppc_spe_sha1_mod_init);
module_exit(ppc_spe_sha1_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
MODULE_ALIAS_CRYPTO("sha1");
MODULE_ALIAS_CRYPTO("sha1-ppc-spe");

View File

@ -0,0 +1,323 @@
/*
* Fast SHA-256 implementation for SPE instruction set (PPC)
*
* This code makes use of the SPE SIMD instruction set as defined in
* http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
* Implementation is based on optimization guide notes from
* http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#define rHP r3 /* pointer to hash values in memory */
#define rKP r24 /* pointer to round constants */
#define rWP r4 /* pointer to input data */
#define rH0 r5 /* 8 32 bit hash values in 8 registers */
#define rH1 r6
#define rH2 r7
#define rH3 r8
#define rH4 r9
#define rH5 r10
#define rH6 r11
#define rH7 r12
#define rW0 r14 /* 64 bit registers. 16 words in 8 registers */
#define rW1 r15
#define rW2 r16
#define rW3 r17
#define rW4 r18
#define rW5 r19
#define rW6 r20
#define rW7 r21
#define rT0 r22 /* 64 bit temporaries */
#define rT1 r23
#define rT2 r0 /* 32 bit temporaries */
#define rT3 r25
#define CMP_KN_LOOP
#define CMP_KC_LOOP \
cmpwi rT1,0;
#define INITIALIZE \
stwu r1,-128(r1); /* create stack frame */ \
evstdw r14,8(r1); /* We must save non volatile */ \
evstdw r15,16(r1); /* registers. Take the chance */ \
evstdw r16,24(r1); /* and save the SPE part too */ \
evstdw r17,32(r1); \
evstdw r18,40(r1); \
evstdw r19,48(r1); \
evstdw r20,56(r1); \
evstdw r21,64(r1); \
evstdw r22,72(r1); \
evstdw r23,80(r1); \
stw r24,88(r1); /* save normal registers */ \
stw r25,92(r1);
#define FINALIZE \
evldw r14,8(r1); /* restore SPE registers */ \
evldw r15,16(r1); \
evldw r16,24(r1); \
evldw r17,32(r1); \
evldw r18,40(r1); \
evldw r19,48(r1); \
evldw r20,56(r1); \
evldw r21,64(r1); \
evldw r22,72(r1); \
evldw r23,80(r1); \
lwz r24,88(r1); /* restore normal registers */ \
lwz r25,92(r1); \
xor r0,r0,r0; \
stw r0,8(r1); /* Delete sensitive data */ \
stw r0,16(r1); /* that we might have pushed */ \
stw r0,24(r1); /* from other context that runs */ \
stw r0,32(r1); /* the same code. Assume that */ \
stw r0,40(r1); /* the lower part of the GPRs */ \
stw r0,48(r1); /* was already overwritten on */ \
stw r0,56(r1); /* the way down to here */ \
stw r0,64(r1); \
stw r0,72(r1); \
stw r0,80(r1); \
addi r1,r1,128; /* cleanup stack frame */
#ifdef __BIG_ENDIAN__
#define LOAD_DATA(reg, off) \
lwz reg,off(rWP); /* load data */
#define NEXT_BLOCK \
addi rWP,rWP,64; /* increment per block */
#else
#define LOAD_DATA(reg, off) \
lwbrx reg,0,rWP; /* load data */ \
addi rWP,rWP,4; /* increment per word */
#define NEXT_BLOCK /* nothing to do */
#endif
#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
LOAD_DATA(w, off) /* 1: W */ \
rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \
rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \
rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \
xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \
and rT3,e,f; /* 1: ch = e and f */ \
xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \
andc rT1,g,e; /* 1: ch' = ~e and g */ \
lwz rT2,off(rKP); /* 1: K */ \
xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \
add h,h,rT0; /* 1: temp1 = h + S1 */ \
add rT3,rT3,w; /* 1: temp1' = ch + w */ \
rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \
add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \
rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \
add h,h,rT2; /* 1: temp1 = temp1 + K */ \
rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \
xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \
add d,d,h; /* 1: d = d + temp1 */ \
xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \
evmergelo w,w,w; /* shift W */ \
or rT2,a,b; /* 1: maj = a or b */ \
and rT1,a,b; /* 1: maj' = a and b */ \
and rT2,rT2,c; /* 1: maj = maj and c */ \
LOAD_DATA(w, off+4) /* 2: W */ \
or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \
rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \
rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
add h,h,rT3; /* 1: h = temp1 + temp2 */ \
rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \
xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
and rT3,d,e; /* 2: ch = e and f */ \
xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
andc rT1,f,d; /* 2: ch' = ~e and g */ \
lwz rT2,off+4(rKP); /* 2: K */ \
xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
add g,g,rT0; /* 2: temp1 = h + S1 */ \
add rT3,rT3,w; /* 2: temp1' = ch + w */ \
rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \
rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
add g,g,rT2; /* 2: temp1 = temp1 + K */ \
rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
or rT2,h,a; /* 2: maj = a or b */ \
xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
and rT1,h,a; /* 2: maj' = a and b */ \
and rT2,rT2,b; /* 2: maj = maj and c */ \
add c,c,g; /* 2: d = d + temp1 */ \
or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
add g,g,rT3 /* 2: h = temp1 + temp2 */
#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \
evmergelohi rT0,w0,w1; /* w[-15] */ \
rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \
evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \
xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \
rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \
evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \
xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \
add h,h,rT2; /* 1: temp1 = h + S1 */ \
evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \
and rT2,e,f; /* 1: ch = e and f */ \
evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \
andc rT3,g,e; /* 1: ch' = ~e and g */ \
evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \
xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \
evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \
add h,h,rT2; /* 1: temp1 = temp1 + ch */ \
evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \
evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \
rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \
evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
evldw rT1,off(rKP); /* k */ \
rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \
evaddw w0,w0,rT0; /* w = w + s1 */ \
xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
evmergelohi rT0,w4,w5; /* w[-7] */ \
and rT3,a,b; /* 1: maj = a and b */ \
evaddw w0,w0,rT0; /* w = w + w[-7] */ \
CMP_K##k##_LOOP \
add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \
evaddw rT1,rT1,w0; /* wk = w + k */ \
xor rT3,a,b; /* 1: maj = a xor b */ \
evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \
and rT3,rT3,c; /* 1: maj = maj and c */ \
add h,h,rT0; /* 1: temp1 = temp1 + wk */ \
add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \
add g,g,rT1; /* 2: temp1 = temp1 + wk */ \
add d,d,h; /* 1: d = d + temp1 */ \
rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
add h,h,rT2; /* 1: h = temp1 + temp2 */ \
rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \
xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
and rT3,d,e; /* 2: ch = e and f */ \
xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
andc rT1,f,d; /* 2: ch' = ~e and g */ \
add g,g,rT0; /* 2: temp1 = h + S1 */ \
xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
add g,g,rT3; /* 2: temp1 = temp1 + ch */ \
rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
or rT2,h,a; /* 2: maj = a or b */ \
and rT1,h,a; /* 2: maj' = a and b */ \
and rT2,rT2,b; /* 2: maj = maj and c */ \
xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
add c,c,g; /* 2: d = d + temp1 */ \
add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
add g,g,rT3 /* 2: h = temp1 + temp2 */
_GLOBAL(ppc_spe_sha256_transform)
INITIALIZE
mtctr r5
lwz rH0,0(rHP)
lwz rH1,4(rHP)
lwz rH2,8(rHP)
lwz rH3,12(rHP)
lwz rH4,16(rHP)
lwz rH5,20(rHP)
lwz rH6,24(rHP)
lwz rH7,28(rHP)
ppc_spe_sha256_main:
lis rKP,PPC_SPE_SHA256_K@ha
addi rKP,rKP,PPC_SPE_SHA256_K@l
R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
ppc_spe_sha256_16_rounds:
addi rKP,rKP,64
R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
rW0, rW1, rW4, rW5, rW7, N, 0)
R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
rW1, rW2, rW5, rW6, rW0, N, 8)
R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
rW2, rW3, rW6, rW7, rW1, N, 16)
R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
rW3, rW4, rW7, rW0, rW2, N, 24)
R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
rW4, rW5, rW0, rW1, rW3, N, 32)
R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
rW5, rW6, rW1, rW2, rW4, N, 40)
R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
rW6, rW7, rW2, rW3, rW5, N, 48)
R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
rW7, rW0, rW3, rW4, rW6, C, 56)
bt gt,ppc_spe_sha256_16_rounds
lwz rW0,0(rHP)
NEXT_BLOCK
lwz rW1,4(rHP)
lwz rW2,8(rHP)
lwz rW3,12(rHP)
lwz rW4,16(rHP)
lwz rW5,20(rHP)
lwz rW6,24(rHP)
lwz rW7,28(rHP)
add rH0,rH0,rW0
stw rH0,0(rHP)
add rH1,rH1,rW1
stw rH1,4(rHP)
add rH2,rH2,rW2
stw rH2,8(rHP)
add rH3,rH3,rW3
stw rH3,12(rHP)
add rH4,rH4,rW4
stw rH4,16(rHP)
add rH5,rH5,rW5
stw rH5,20(rHP)
add rH6,rH6,rW6
stw rH6,24(rHP)
add rH7,rH7,rW7
stw rH7,28(rHP)
bdnz ppc_spe_sha256_main
FINALIZE
blr
.data
.align 5
PPC_SPE_SHA256_K:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2

View File

@ -0,0 +1,275 @@
/*
* Glue code for SHA-256 implementation for SPE instructions (PPC)
*
* Based on generic implementation. The assembler module takes care
* about the SPE registers so it can run from interrupt context.
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <linux/hardirq.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
* between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
* operations per 64 bytes. e500 cores can issue two arithmetic instructions
* per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
* Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
* Headroom for cache misses included. Even with the low end model clocked
* at 667 MHz this equals to a critical time window of less than 27us.
*
*/
#define MAX_BYTES 1024
extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
static void spe_begin(void)
{
/* We just start SPE operations and will save SPE registers later. */
preempt_disable();
enable_kernel_spe();
}
static void spe_end(void)
{
/* reenable preemption */
preempt_enable();
}
static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
{
int count = sizeof(struct sha256_state) >> 2;
u32 *ptr = (u32 *)sctx;
/* make sure we can clear the fast way */
BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
do { *ptr++ = 0; } while (--count);
}
static int ppc_spe_sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
}
static int ppc_spe_sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
const unsigned int offset = sctx->count & 0x3f;
const unsigned int avail = 64 - offset;
unsigned int bytes;
const u8 *src = data;
if (avail > len) {
sctx->count += len;
memcpy((char *)sctx->buf + offset, src, len);
return 0;
}
sctx->count += len;
if (offset) {
memcpy((char *)sctx->buf + offset, src, avail);
spe_begin();
ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
spe_end();
len -= avail;
src += avail;
}
while (len > 63) {
/* cut input data into smaller blocks */
bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
bytes = bytes & ~0x3f;
spe_begin();
ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
spe_end();
src += bytes;
len -= bytes;
};
memcpy((char *)sctx->buf, src, len);
return 0;
}
static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
const unsigned int offset = sctx->count & 0x3f;
char *p = (char *)sctx->buf + offset;
int padlen;
__be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
__be32 *dst = (__be32 *)out;
padlen = 55 - offset;
*p++ = 0x80;
spe_begin();
if (padlen < 0) {
memset(p, 0x00, padlen + sizeof (u64));
ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
p = (char *)sctx->buf;
padlen = 56;
}
memset(p, 0, padlen);
*pbits = cpu_to_be64(sctx->count << 3);
ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
spe_end();
dst[0] = cpu_to_be32(sctx->state[0]);
dst[1] = cpu_to_be32(sctx->state[1]);
dst[2] = cpu_to_be32(sctx->state[2]);
dst[3] = cpu_to_be32(sctx->state[3]);
dst[4] = cpu_to_be32(sctx->state[4]);
dst[5] = cpu_to_be32(sctx->state[5]);
dst[6] = cpu_to_be32(sctx->state[6]);
dst[7] = cpu_to_be32(sctx->state[7]);
ppc_sha256_clear_context(sctx);
return 0;
}
static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
{
u32 D[SHA256_DIGEST_SIZE >> 2];
__be32 *dst = (__be32 *)out;
ppc_spe_sha256_final(desc, (u8 *)D);
/* avoid bytewise memcpy */
dst[0] = D[0];
dst[1] = D[1];
dst[2] = D[2];
dst[3] = D[3];
dst[4] = D[4];
dst[5] = D[5];
dst[6] = D[6];
/* clear sensitive data */
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg algs[2] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = ppc_spe_sha256_init,
.update = ppc_spe_sha256_update,
.final = ppc_spe_sha256_final,
.export = ppc_spe_sha256_export,
.import = ppc_spe_sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name= "sha256-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = ppc_spe_sha224_init,
.update = ppc_spe_sha256_update,
.final = ppc_spe_sha224_final,
.export = ppc_spe_sha256_export,
.import = ppc_spe_sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name= "sha224-ppc-spe",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init ppc_spe_sha256_mod_init(void)
{
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
static void __exit ppc_spe_sha256_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(ppc_spe_sha256_mod_init);
module_exit(ppc_spe_sha256_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
MODULE_ALIAS_CRYPTO("sha224");
MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
MODULE_ALIAS_CRYPTO("sha256");
MODULE_ALIAS_CRYPTO("sha256-ppc-spe");

View File

@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm)
PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
struct crypto_aead *cryptd_child;
struct aesni_rfc4106_gcm_ctx *child_ctx;
cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni",
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
@ -890,15 +892,12 @@ out_free_ablkcipher:
return ret;
}
static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
unsigned int key_len)
static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
unsigned int key_len)
{
int ret = 0;
struct crypto_tfm *tfm = crypto_aead_tfm(parent);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
struct aesni_rfc4106_gcm_ctx *child_ctx =
aesni_rfc4106_gcm_ctx_get(cryptd_child);
struct crypto_tfm *tfm = crypto_aead_tfm(aead);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
u8 *new_key_align, *new_key_mem = NULL;
if (key_len < 4) {
@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
goto exit;
}
ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
memcpy(child_ctx, ctx, sizeof(*ctx));
exit:
kfree(new_key_mem);
return ret;
}
/* This is the Integrity Check Value (aka the authentication tag length and can
* be 8, 12 or 16 bytes long. */
static int rfc4106_set_authsize(struct crypto_aead *parent,
unsigned int authsize)
static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
unsigned int key_len)
{
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child);
struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm;
int ret;
ret = crypto_aead_setkey(child, key, key_len);
if (!ret) {
memcpy(ctx, c_ctx, sizeof(*ctx));
ctx->cryptd_tfm = cryptd_tfm;
}
return ret;
}
static int common_rfc4106_set_authsize(struct crypto_aead *aead,
unsigned int authsize)
{
switch (authsize) {
case 8:
case 12:
@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
default:
return -EINVAL;
}
crypto_aead_crt(parent)->authsize = authsize;
crypto_aead_crt(cryptd_child)->authsize = authsize;
crypto_aead_crt(aead)->authsize = authsize;
return 0;
}
static int rfc4106_encrypt(struct aead_request *req)
/* This is the Integrity Check Value (aka the authentication tag length and can
* be 8, 12 or 16 bytes long. */
static int rfc4106_set_authsize(struct crypto_aead *parent,
unsigned int authsize)
{
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
int ret;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
if (!irq_fpu_usable()) {
struct aead_request *cryptd_req =
(struct aead_request *) aead_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_aead_encrypt(cryptd_req);
} else {
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
kernel_fpu_begin();
ret = cryptd_child->base.crt_aead.encrypt(req);
kernel_fpu_end();
return ret;
}
}
static int rfc4106_decrypt(struct aead_request *req)
{
int ret;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
if (!irq_fpu_usable()) {
struct aead_request *cryptd_req =
(struct aead_request *) aead_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_aead_decrypt(cryptd_req);
} else {
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
kernel_fpu_begin();
ret = cryptd_child->base.crt_aead.decrypt(req);
kernel_fpu_end();
return ret;
}
ret = crypto_aead_setauthsize(child, authsize);
if (!ret)
crypto_aead_crt(parent)->authsize = authsize;
return ret;
}
static int __driver_rfc4106_encrypt(struct aead_request *req)
@ -1185,6 +1167,78 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
}
return retval;
}
static int rfc4106_encrypt(struct aead_request *req)
{
int ret;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
if (!irq_fpu_usable()) {
struct aead_request *cryptd_req =
(struct aead_request *) aead_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
ret = crypto_aead_encrypt(cryptd_req);
} else {
kernel_fpu_begin();
ret = __driver_rfc4106_encrypt(req);
kernel_fpu_end();
}
return ret;
}
static int rfc4106_decrypt(struct aead_request *req)
{
int ret;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
if (!irq_fpu_usable()) {
struct aead_request *cryptd_req =
(struct aead_request *) aead_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
ret = crypto_aead_decrypt(cryptd_req);
} else {
kernel_fpu_begin();
ret = __driver_rfc4106_decrypt(req);
kernel_fpu_end();
}
return ret;
}
static int helper_rfc4106_encrypt(struct aead_request *req)
{
int ret;
if (unlikely(!irq_fpu_usable())) {
WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
ret = -EINVAL;
} else {
kernel_fpu_begin();
ret = __driver_rfc4106_encrypt(req);
kernel_fpu_end();
}
return ret;
}
static int helper_rfc4106_decrypt(struct aead_request *req)
{
int ret;
if (unlikely(!irq_fpu_usable())) {
WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
ret = -EINVAL;
} else {
kernel_fpu_begin();
ret = __driver_rfc4106_decrypt(req);
kernel_fpu_end();
}
return ret;
}
#endif
static struct crypto_alg aesni_algs[] = { {
@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__aes-aesni",
.cra_driver_name = "__driver-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__ecb-aes-aesni",
.cra_driver_name = "__driver-ecb-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__cbc-aes-aesni",
.cra_driver_name = "__driver-cbc-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__ctr-aes-aesni",
.cra_driver_name = "__driver-ctr-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__gcm-aes-aesni",
.cra_driver_name = "__driver-gcm-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) +
AESNI_ALIGN,
@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { {
.cra_module = THIS_MODULE,
.cra_u = {
.aead = {
.encrypt = __driver_rfc4106_encrypt,
.decrypt = __driver_rfc4106_decrypt,
.setkey = common_rfc4106_set_key,
.setauthsize = common_rfc4106_set_authsize,
.encrypt = helper_rfc4106_encrypt,
.decrypt = helper_rfc4106_decrypt,
.ivsize = 8,
.maxauthsize = 16,
},
},
}, {
@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__lrw-aes-aesni",
.cra_driver_name = "__driver-lrw-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesni_lrw_ctx),
.cra_alignmask = 0,
@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__xts-aes-aesni",
.cra_driver_name = "__driver-xts-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesni_xts_ctx),
.cra_alignmask = 0,

View File

@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni-avx2",
.cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__cbc-camellia-aesni-avx2",
.cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ctr-camellia-aesni-avx2",
.cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__lrw-camellia-aesni-avx2",
.cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__xts-camellia-aesni-avx2",
.cra_driver_name = "__driver-xts-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,

View File

@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni",
.cra_driver_name = "__driver-ecb-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__cbc-camellia-aesni",
.cra_driver_name = "__driver-cbc-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ctr-camellia-aesni",
.cra_driver_name = "__driver-ctr-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__lrw-camellia-aesni",
.cra_driver_name = "__driver-lrw-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__xts-camellia-aesni",
.cra_driver_name = "__driver-xts-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,

View File

@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { {
.cra_name = "__ecb-cast5-avx",
.cra_driver_name = "__driver-ecb-cast5-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { {
.cra_name = "__cbc-cast5-avx",
.cra_driver_name = "__driver-cbc-cast5-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { {
.cra_name = "__ctr-cast5-avx",
.cra_driver_name = "__driver-ctr-cast5-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,

View File

@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__ecb-cast6-avx",
.cra_driver_name = "__driver-ecb-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__cbc-cast6-avx",
.cra_driver_name = "__driver-cbc-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__ctr-cast6-avx",
.cra_driver_name = "__driver-ctr-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__lrw-cast6-avx",
.cra_driver_name = "__driver-lrw-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_lrw_ctx),
.cra_alignmask = 0,
@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__xts-cast6-avx",
.cra_driver_name = "__driver-xts-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_xts_ctx),
.cra_alignmask = 0,

View File

@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = {
.cra_name = "__ghash",
.cra_driver_name = "__ghash-pclmulqdqni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_flags = CRYPTO_ALG_TYPE_SHASH |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
.cra_module = THIS_MODULE,
@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm)
struct cryptd_ahash *cryptd_tfm;
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;

View File

@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
le128_to_be128((be128 *)walk->iv, &ctrblk);
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,

View File

@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__ecb-serpent-avx2",
.cra_driver_name = "__driver-ecb-serpent-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__cbc-serpent-avx2",
.cra_driver_name = "__driver-cbc-serpent-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__ctr-serpent-avx2",
.cra_driver_name = "__driver-ctr-serpent-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__lrw-serpent-avx2",
.cra_driver_name = "__driver-lrw-serpent-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__xts-serpent-avx2",
.cra_driver_name = "__driver-xts-serpent-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,

View File

@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-avx",
.cra_driver_name = "__driver-ecb-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__cbc-serpent-avx",
.cra_driver_name = "__driver-cbc-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ctr-serpent-avx",
.cra_driver_name = "__driver-ctr-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__lrw-serpent-avx",
.cra_driver_name = "__driver-lrw-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__xts-serpent-avx",
.cra_driver_name = "__driver-xts-serpent-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,

View File

@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-sse2",
.cra_driver_name = "__driver-ecb-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__cbc-serpent-sse2",
.cra_driver_name = "__driver-cbc-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ctr-serpent-sse2",
.cra_driver_name = "__driver-ctr-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__lrw-serpent-sse2",
.cra_driver_name = "__driver-lrw-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__xts-serpent-sse2",
.cra_driver_name = "__driver-xts-serpent-sse2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,

View File

@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = {
* use ASYNC flag as some buffers in multi-buffer
* algo may not have completed before hashing thread sleep
*/
.cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC,
.cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
struct mcryptd_hash_ctx *mctx;
mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0);
mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(mcryptd_tfm))
return PTR_ERR(mcryptd_tfm);
mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
while (!list_empty(&cstate->work_list)) {
rctx = list_entry(cstate->work_list.next,
struct mcryptd_hash_request_ctx, waiter);
if time_before(cur_time, rctx->tag.expire)
if (time_before(cur_time, rctx->tag.expire))
break;
kernel_fpu_begin();
sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);

View File

@ -56,7 +56,7 @@
void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
{
unsigned int j;
state->unused_lanes = 0xF76543210;
state->unused_lanes = 0xF76543210ULL;
for (j = 0; j < 8; j++) {
state->lens[j] = 0xFFFFFFFF;
state->ldata[j].job_in_lane = NULL;

View File

@ -28,7 +28,7 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <crypto/sha1_base.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
unsigned int rounds);
unsigned int rounds);
#endif
static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
static int sha1_ssse3_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_transform_asm(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_transform_asm(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
static void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
if (!irq_fpu_usable() ||
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return crypto_sha1_update(desc, data, len);
return 0;
}
/* make sure casting to sha1_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
if (!irq_fpu_usable()) {
res = crypto_sha1_update(desc, data, len);
} else {
kernel_fpu_begin();
res = __sha1_ssse3_update(desc, data, len, partial);
kernel_fpu_end();
}
kernel_fpu_begin();
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_transform_asm);
kernel_fpu_end();
return res;
return 0;
}
static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!irq_fpu_usable())
return crypto_sha1_finup(desc, data, len, out);
kernel_fpu_begin();
if (len)
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_transform_asm);
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm);
kernel_fpu_end();
return sha1_base_finish(desc, out);
}
/* Add padding and return the message digest. */
static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
if (!irq_fpu_usable()) {
crypto_sha1_update(desc, padding, padlen);
crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_fpu_begin();
/* We need to fill a whole block for __sha1_ssse3_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_ssse3_update(desc, padding, padlen, index);
}
__sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
kernel_fpu_end();
}
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha1_ssse3_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
return sha1_ssse3_finup(desc, NULL, 0, out);
}
#ifdef CONFIG_AS_AVX2
@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data,
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_ssse3_init,
.init = sha1_base_init,
.update = sha1_ssse3_update,
.final = sha1_ssse3_final,
.export = sha1_ssse3_export,
.import = sha1_ssse3_import,
.finup = sha1_ssse3_finup,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-ssse3",

View File

@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
BYTE_FLIP_MASK = %xmm13
NUM_BLKS = %rdx # 3rd arg
CTX = %rsi # 2nd arg
INP = %rdi # 1st arg
INP = %rsi # 2nd arg
CTX = %rdi # 1st arg
SRND = %rdi # clobbers INP
SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
@ -342,8 +342,8 @@ a = TMP_
########################################################################
## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
## arg 1 : pointer to input data
## arg 2 : pointer to digest
## arg 1 : pointer to digest
## arg 2 : pointer to input data
## arg 3 : Num blocks
########################################################################
.text

View File

@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13
X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
NUM_BLKS = %rdx # 3rd arg
CTX = %rsi # 2nd arg
INP = %rdi # 1st arg
INP = %rsi # 2nd arg
CTX = %rdi # 1st arg
c = %ecx
d = %r8d
e = %edx # clobbers NUM_BLKS
y3 = %edi # clobbers INP
y3 = %esi # clobbers INP
TBL = %rbp
@ -523,8 +523,8 @@ STACK_SIZE = _RSP + _RSP_SIZE
########################################################################
## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
## arg 1 : pointer to input data
## arg 2 : pointer to digest
## arg 1 : pointer to digest
## arg 2 : pointer to input data
## arg 3 : Num blocks
########################################################################
.text

View File

@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
BYTE_FLIP_MASK = %xmm12
NUM_BLKS = %rdx # 3rd arg
CTX = %rsi # 2nd arg
INP = %rdi # 1st arg
INP = %rsi # 2nd arg
CTX = %rdi # 1st arg
SRND = %rdi # clobbers INP
SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
@ -348,8 +348,8 @@ a = TMP_
########################################################################
## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
## arg 1 : pointer to input data
## arg 2 : pointer to digest
## arg 1 : pointer to digest
## arg 2 : pointer to input data
## arg 3 : Num blocks
########################################################################
.text

View File

@ -36,195 +36,74 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <crypto/sha256_base.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <linux/string.h>
asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
u64 rounds);
asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
u64 rounds);
#ifdef CONFIG_AS_AVX
asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
u64 rounds);
#endif
#ifdef CONFIG_AS_AVX2
asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
u64 rounds);
asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
u64 rounds);
#endif
static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
static int sha256_ssse3_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
}
static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha256_transform_asm(sctx->buf, sctx->state, 1);
}
if (len - done >= SHA256_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
sha256_transform_asm(data + done, sctx->state, (u64) rounds);
done += rounds * SHA256_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
static void (*sha256_transform_asm)(u32 *, const char *, u64);
static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA256_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buf + partial, data, len);
if (!irq_fpu_usable() ||
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
return crypto_sha256_update(desc, data, len);
return 0;
}
/* make sure casting to sha256_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
if (!irq_fpu_usable()) {
res = crypto_sha256_update(desc, data, len);
} else {
kernel_fpu_begin();
res = __sha256_ssse3_update(desc, data, len, partial);
kernel_fpu_end();
}
kernel_fpu_begin();
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_transform_asm);
kernel_fpu_end();
return res;
return 0;
}
static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!irq_fpu_usable())
return crypto_sha256_finup(desc, data, len, out);
kernel_fpu_begin();
if (len)
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_transform_asm);
sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm);
kernel_fpu_end();
return sha256_base_finish(desc, out);
}
/* Add padding and return the message digest. */
static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA256_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
if (!irq_fpu_usable()) {
crypto_sha256_update(desc, padding, padlen);
crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_fpu_begin();
/* We need to fill a whole block for __sha256_ssse3_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha256_ssse3_update(desc, padding, padlen, index);
}
__sha256_ssse3_update(desc, (const u8 *)&bits,
sizeof(bits), 56);
kernel_fpu_end();
}
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha256_ssse3_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static int sha224_ssse3_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_ssse3_final(desc, D);
memcpy(hash, D, SHA224_DIGEST_SIZE);
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
return sha256_ssse3_finup(desc, NULL, 0, out);
}
static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_ssse3_init,
.init = sha256_base_init,
.update = sha256_ssse3_update,
.final = sha256_ssse3_final,
.export = sha256_ssse3_export,
.import = sha256_ssse3_import,
.finup = sha256_ssse3_finup,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ssse3",
@ -235,13 +114,11 @@ static struct shash_alg algs[] = { {
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_ssse3_init,
.init = sha224_base_init,
.update = sha256_ssse3_update,
.final = sha224_ssse3_final,
.export = sha256_ssse3_export,
.import = sha256_ssse3_import,
.final = sha256_ssse3_final,
.finup = sha256_ssse3_finup,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ssse3",

View File

@ -54,9 +54,9 @@
# Virtual Registers
# ARG1
msg = %rdi
digest = %rdi
# ARG2
digest = %rsi
msg = %rsi
# ARG3
msglen = %rdx
T1 = %rcx
@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
.endm
########################################################################
# void sha512_transform_avx(const void* M, void* D, u64 L)
# void sha512_transform_avx(void* D, const void* M, u64 L)
# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
# The size of the message pointed to by M must be an integer multiple of SHA512
# message blocks.

View File

@ -70,9 +70,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9
# 1st arg
INP = %rdi
CTX = %rdi
# 2nd arg
CTX = %rsi
INP = %rsi
# 3rd arg
NUM_BLKS = %rdx
@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
.endm
########################################################################
# void sha512_transform_rorx(const void* M, void* D, uint64_t L)#
# void sha512_transform_rorx(void* D, const void* M, uint64_t L)#
# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
# The size of the message pointed to by M must be an integer multiple of SHA512
# message blocks.

View File

@ -53,9 +53,9 @@
# Virtual Registers
# ARG1
msg = %rdi
digest = %rdi
# ARG2
digest = %rsi
msg = %rsi
# ARG3
msglen = %rdx
T1 = %rcx
@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
.endm
########################################################################
# void sha512_transform_ssse3(const void* M, void* D, u64 L)#
# void sha512_transform_ssse3(void* D, const void* M, u64 L)#
# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
# The size of the message pointed to by M must be an integer multiple of SHA512
# message blocks.

View File

@ -34,205 +34,75 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <crypto/sha512_base.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <linux/string.h>
asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest,
u64 rounds);
asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
u64 rounds);
#ifdef CONFIG_AS_AVX
asmlinkage void sha512_transform_avx(const char *data, u64 *digest,
asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
u64 rounds);
#endif
#ifdef CONFIG_AS_AVX2
asmlinkage void sha512_transform_rorx(const char *data, u64 *digest,
u64 rounds);
asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
u64 rounds);
#endif
static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);
static int sha512_ssse3_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA512_H0;
sctx->state[1] = SHA512_H1;
sctx->state[2] = SHA512_H2;
sctx->state[3] = SHA512_H3;
sctx->state[4] = SHA512_H4;
sctx->state[5] = SHA512_H5;
sctx->state[6] = SHA512_H6;
sctx->state[7] = SHA512_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count[0] += len;
if (sctx->count[0] < len)
sctx->count[1]++;
if (partial) {
done = SHA512_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha512_transform_asm(sctx->buf, sctx->state, 1);
}
if (len - done >= SHA512_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
sha512_transform_asm(data + done, sctx->state, (u64) rounds);
done += rounds * SHA512_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
static void (*sha512_transform_asm)(u64 *, const char *, u64);
static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA512_BLOCK_SIZE) {
sctx->count[0] += len;
if (sctx->count[0] < len)
sctx->count[1]++;
memcpy(sctx->buf + partial, data, len);
if (!irq_fpu_usable() ||
(sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
return crypto_sha512_update(desc, data, len);
return 0;
}
/* make sure casting to sha512_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
if (!irq_fpu_usable()) {
res = crypto_sha512_update(desc, data, len);
} else {
kernel_fpu_begin();
res = __sha512_ssse3_update(desc, data, len, partial);
kernel_fpu_end();
}
kernel_fpu_begin();
sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_transform_asm);
kernel_fpu_end();
return res;
return 0;
}
static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!irq_fpu_usable())
return crypto_sha512_finup(desc, data, len, out);
kernel_fpu_begin();
if (len)
sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_transform_asm);
sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm);
kernel_fpu_end();
return sha512_base_finish(desc, out);
}
/* Add padding and return the message digest. */
static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be64 *dst = (__be64 *)out;
__be64 bits[2];
static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
/* save number of bits */
bits[1] = cpu_to_be64(sctx->count[0] << 3);
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
/* Pad out to 112 mod 128 and append length */
index = sctx->count[0] & 0x7f;
padlen = (index < 112) ? (112 - index) : ((128+112) - index);
if (!irq_fpu_usable()) {
crypto_sha512_update(desc, padding, padlen);
crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_fpu_begin();
/* We need to fill a whole block for __sha512_ssse3_update() */
if (padlen <= 112) {
sctx->count[0] += padlen;
if (sctx->count[0] < padlen)
sctx->count[1]++;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha512_ssse3_update(desc, padding, padlen, index);
}
__sha512_ssse3_update(desc, (const u8 *)&bits,
sizeof(bits), 112);
kernel_fpu_end();
}
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be64(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha512_ssse3_export(struct shash_desc *desc, void *out)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static int sha384_ssse3_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA512_DIGEST_SIZE];
sha512_ssse3_final(desc, D);
memcpy(hash, D, SHA384_DIGEST_SIZE);
memzero_explicit(D, SHA512_DIGEST_SIZE);
return 0;
return sha512_ssse3_finup(desc, NULL, 0, out);
}
static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_ssse3_init,
.init = sha512_base_init,
.update = sha512_ssse3_update,
.final = sha512_ssse3_final,
.export = sha512_ssse3_export,
.import = sha512_ssse3_import,
.finup = sha512_ssse3_finup,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-ssse3",
@ -243,13 +113,11 @@ static struct shash_alg algs[] = { {
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_ssse3_init,
.init = sha384_base_init,
.update = sha512_ssse3_update,
.final = sha384_ssse3_final,
.export = sha512_ssse3_export,
.import = sha512_ssse3_import,
.final = sha512_ssse3_final,
.finup = sha512_ssse3_finup,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-ssse3",

View File

@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__ecb-twofish-avx",
.cra_driver_name = "__driver-ecb-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__cbc-twofish-avx",
.cra_driver_name = "__driver-cbc-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__ctr-twofish-avx",
.cra_driver_name = "__driver-ctr-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__lrw-twofish-avx",
.cra_driver_name = "__driver-lrw-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_lrw_ctx),
.cra_alignmask = 0,
@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__xts-twofish-avx",
.cra_driver_name = "__driver-xts-twofish-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_xts_ctx),
.cra_alignmask = 0,

View File

@ -436,6 +436,14 @@ config CRYPTO_MD5_OCTEON
MD5 message digest algorithm (RFC1321) implemented
using OCTEON crypto instructions, when available.
config CRYPTO_MD5_PPC
tristate "MD5 digest algorithm (PPC)"
depends on PPC
select CRYPTO_HASH
help
MD5 message digest algorithm (RFC1321) implemented
in PPC assembler.
config CRYPTO_MD5_SPARC64
tristate "MD5 digest algorithm (SPARC64)"
depends on SPARC64
@ -546,6 +554,15 @@ config CRYPTO_SHA512_SSSE3
Extensions version 1 (AVX1), or Advanced Vector Extensions
version 2 (AVX2) instructions, when available.
config CRYPTO_SHA1_OCTEON
tristate "SHA1 digest algorithm (OCTEON)"
depends on CPU_CAVIUM_OCTEON
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using OCTEON crypto instructions, when available.
config CRYPTO_SHA1_SPARC64
tristate "SHA1 digest algorithm (SPARC64)"
depends on SPARC64
@ -555,26 +572,6 @@ config CRYPTO_SHA1_SPARC64
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using sparc64 crypto instructions, when available.
config CRYPTO_SHA1_ARM
tristate "SHA1 digest algorithm (ARM-asm)"
depends on ARM
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM assembler.
config CRYPTO_SHA1_ARM_NEON
tristate "SHA1 digest algorithm (ARM NEON)"
depends on ARM && KERNEL_MODE_NEON
select CRYPTO_SHA1_ARM
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM NEON assembly, when NEON instructions are
available.
config CRYPTO_SHA1_PPC
tristate "SHA1 digest algorithm (powerpc)"
depends on PPC
@ -582,6 +579,13 @@ config CRYPTO_SHA1_PPC
This is the powerpc hardware accelerated implementation of the
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
config CRYPTO_SHA1_PPC_SPE
tristate "SHA1 digest algorithm (PPC SPE)"
depends on PPC && SPE
help
SHA-1 secure hash standard (DFIPS 180-4) implemented
using powerpc SPE SIMD instruction set.
config CRYPTO_SHA1_MB
tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
depends on X86 && 64BIT
@ -610,6 +614,24 @@ config CRYPTO_SHA256
This code also includes SHA-224, a 224 bit hash with 112 bits
of security against collision attacks.
config CRYPTO_SHA256_PPC_SPE
tristate "SHA224 and SHA256 digest algorithm (PPC SPE)"
depends on PPC && SPE
select CRYPTO_SHA256
select CRYPTO_HASH
help
SHA224 and SHA256 secure hash standard (DFIPS 180-2)
implemented using powerpc SPE SIMD instruction set.
config CRYPTO_SHA256_OCTEON
tristate "SHA224 and SHA256 digest algorithm (OCTEON)"
depends on CPU_CAVIUM_OCTEON
select CRYPTO_SHA256
select CRYPTO_HASH
help
SHA-256 secure hash standard (DFIPS 180-2) implemented
using OCTEON crypto instructions, when available.
config CRYPTO_SHA256_SPARC64
tristate "SHA224 and SHA256 digest algorithm (SPARC64)"
depends on SPARC64
@ -631,6 +653,15 @@ config CRYPTO_SHA512
This code also includes SHA-384, a 384 bit hash with 192 bits
of security against collision attacks.
config CRYPTO_SHA512_OCTEON
tristate "SHA384 and SHA512 digest algorithms (OCTEON)"
depends on CPU_CAVIUM_OCTEON
select CRYPTO_SHA512
select CRYPTO_HASH
help
SHA-512 secure hash standard (DFIPS 180-2) implemented
using OCTEON crypto instructions, when available.
config CRYPTO_SHA512_SPARC64
tristate "SHA384 and SHA512 digest algorithm (SPARC64)"
depends on SPARC64
@ -640,21 +671,6 @@ config CRYPTO_SHA512_SPARC64
SHA-512 secure hash standard (DFIPS 180-2) implemented
using sparc64 crypto instructions, when available.
config CRYPTO_SHA512_ARM_NEON
tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
depends on ARM && KERNEL_MODE_NEON
select CRYPTO_SHA512
select CRYPTO_HASH
help
SHA-512 secure hash standard (DFIPS 180-2) implemented
using ARM NEON instructions, when available.
This version of SHA implements a 512 bit hash with 256 bits of
security against collision attacks.
This code also includes SHA-384, a 384 bit hash with 192 bits
of security against collision attacks.
config CRYPTO_TGR192
tristate "Tiger digest algorithms"
select CRYPTO_HASH
@ -817,45 +833,18 @@ config CRYPTO_AES_SPARC64
for some popular block cipher mode is supported too, including
ECB and CBC.
config CRYPTO_AES_ARM
tristate "AES cipher algorithms (ARM-asm)"
depends on ARM
select CRYPTO_ALGAPI
select CRYPTO_AES
config CRYPTO_AES_PPC_SPE
tristate "AES cipher algorithms (PPC SPE)"
depends on PPC && SPE
help
Use optimized AES assembler routines for ARM platforms.
AES cipher algorithms (FIPS-197). AES uses the Rijndael
algorithm.
Rijndael appears to be consistently a very good performer in
both hardware and software across a wide range of computing
environments regardless of its use in feedback or non-feedback
modes. Its key setup time is excellent, and its key agility is
good. Rijndael's very low memory requirements make it very well
suited for restricted-space environments, in which it also
demonstrates excellent performance. Rijndael's operations are
among the easiest to defend against power and timing attacks.
The AES specifies three key sizes: 128, 192 and 256 bits
See <http://csrc.nist.gov/encryption/aes/> for more information.
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on ARM && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM
select CRYPTO_ABLK_HELPER
help
Use a faster and more secure NEON based implementation of AES in CBC,
CTR and XTS modes
Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
and for XTS mode encryption, CBC and XTS mode decryption speedup is
around 25%. (CBC encryption speed is not affected by this driver.)
This implementation does not rely on any lookup tables so it is
believed to be invulnerable to cache timing attacks.
AES cipher algorithms (FIPS-197). Additionally the acceleration
for popular block cipher modes ECB, CBC, CTR and XTS is supported.
This module should only be used for low power (router) devices
without hardware AES acceleration (e.g. caam crypto). It reduces the
size of the AES tables from 16KB to 8KB + 256 bytes and mitigates
timining attacks. Nevertheless it might be not as secure as other
architecture specific assembler implementations that work on 1KB
tables or 256 bytes S-boxes.
config CRYPTO_ANUBIS
tristate "Anubis cipher algorithm"
@ -1199,7 +1188,7 @@ config CRYPTO_SERPENT_SSE2_X86_64
Keys are allowed to be from 0 to 256 bits in length, in steps
of 8 bits.
This module provides Serpent cipher algorithm that processes eigth
This module provides Serpent cipher algorithm that processes eight
blocks parallel using SSE2 instruction set.
See also:
@ -1523,6 +1512,15 @@ config CRYPTO_USER_API_RNG
This option enables the user-spaces interface for random
number generator algorithms.
config CRYPTO_USER_API_AEAD
tristate "User-space interface for AEAD cipher algorithms"
depends on NET
select CRYPTO_AEAD
select CRYPTO_USER_API
help
This option enables the user-spaces interface for AEAD
cipher algorithms.
config CRYPTO_HASH_INFO
bool

View File

@ -100,6 +100,7 @@ obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
#
# generic algorithms and the async_tx api

View File

@ -124,7 +124,8 @@ int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_ablkcipher *cryptd_tfm;
cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);

View File

@ -64,6 +64,8 @@ static int crypto_check_alg(struct crypto_alg *alg)
if (alg->cra_priority < 0)
return -EINVAL;
atomic_set(&alg->cra_refcnt, 1);
return crypto_set_driver_name(alg);
}
@ -99,10 +101,9 @@ static struct list_head *crypto_more_spawns(struct crypto_alg *alg,
return &n->list == stack ? top : &n->inst->alg.cra_users;
}
static void crypto_remove_spawn(struct crypto_spawn *spawn,
struct list_head *list)
static void crypto_remove_instance(struct crypto_instance *inst,
struct list_head *list)
{
struct crypto_instance *inst = spawn->inst;
struct crypto_template *tmpl = inst->tmpl;
if (crypto_is_dead(&inst->alg))
@ -167,7 +168,7 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
if (spawn->alg)
list_move(&spawn->list, &spawn->alg->cra_users);
else
crypto_remove_spawn(spawn, list);
crypto_remove_instance(spawn->inst, list);
}
}
EXPORT_SYMBOL_GPL(crypto_remove_spawns);
@ -188,7 +189,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
ret = -EEXIST;
atomic_set(&alg->cra_refcnt, 1);
list_for_each_entry(q, &crypto_alg_list, cra_list) {
if (q == alg)
goto err;
@ -523,7 +523,10 @@ int crypto_register_instance(struct crypto_template *tmpl,
err = crypto_check_alg(&inst->alg);
if (err)
goto err;
return err;
if (unlikely(!crypto_mod_get(&inst->alg)))
return -EAGAIN;
inst->alg.cra_module = tmpl->module;
inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
@ -545,37 +548,30 @@ unlock:
goto err;
crypto_wait_for_test(larval);
/* Remove instance if test failed */
if (!(inst->alg.cra_flags & CRYPTO_ALG_TESTED))
crypto_unregister_instance(inst);
err = 0;
err:
crypto_mod_put(&inst->alg);
return err;
}
EXPORT_SYMBOL_GPL(crypto_register_instance);
int crypto_unregister_instance(struct crypto_alg *alg)
int crypto_unregister_instance(struct crypto_instance *inst)
{
int err;
struct crypto_instance *inst = (void *)alg;
struct crypto_template *tmpl = inst->tmpl;
LIST_HEAD(users);
if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
return -EINVAL;
BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
LIST_HEAD(list);
down_write(&crypto_alg_sem);
hlist_del_init(&inst->list);
err = crypto_remove_alg(alg, &users);
crypto_remove_spawns(&inst->alg, &list, NULL);
crypto_remove_instance(inst, &list);
up_write(&crypto_alg_sem);
if (err)
return err;
tmpl->free(inst);
crypto_remove_final(&users);
crypto_remove_final(&list);
return 0;
}

666
crypto/algif_aead.c Normal file
View File

@ -0,0 +1,666 @@
/*
* algif_aead: User-space interface for AEAD algorithms
*
* Copyright (C) 2014, Stephan Mueller <smueller@chronox.de>
*
* This file provides the user-space API for AEAD ciphers.
*
* This file is derived from algif_skcipher.c.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/scatterwalk.h>
#include <crypto/if_alg.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/net.h>
#include <net/sock.h>
struct aead_sg_list {
unsigned int cur;
struct scatterlist sg[ALG_MAX_PAGES];
};
struct aead_ctx {
struct aead_sg_list tsgl;
/*
* RSGL_MAX_ENTRIES is an artificial limit where user space at maximum
* can cause the kernel to allocate RSGL_MAX_ENTRIES * ALG_MAX_PAGES
* bytes
*/
#define RSGL_MAX_ENTRIES ALG_MAX_PAGES
struct af_alg_sgl rsgl[RSGL_MAX_ENTRIES];
void *iv;
struct af_alg_completion completion;
unsigned long used;
unsigned int len;
bool more;
bool merge;
bool enc;
size_t aead_assoclen;
struct aead_request aead_req;
};
static inline int aead_sndbuf(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
ctx->used, 0);
}
static inline bool aead_writable(struct sock *sk)
{
return PAGE_SIZE <= aead_sndbuf(sk);
}
static inline bool aead_sufficient_data(struct aead_ctx *ctx)
{
unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
return (ctx->used >= (ctx->aead_assoclen + (ctx->enc ? 0 : as)));
}
static void aead_put_sgl(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
struct aead_sg_list *sgl = &ctx->tsgl;
struct scatterlist *sg = sgl->sg;
unsigned int i;
for (i = 0; i < sgl->cur; i++) {
if (!sg_page(sg + i))
continue;
put_page(sg_page(sg + i));
sg_assign_page(sg + i, NULL);
}
sgl->cur = 0;
ctx->used = 0;
ctx->more = 0;
ctx->merge = 0;
}
static void aead_wmem_wakeup(struct sock *sk)
{
struct socket_wq *wq;
if (!aead_writable(sk))
return;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
POLLRDNORM |
POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
static int aead_wait_for_data(struct sock *sk, unsigned flags)
{
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
long timeout;
DEFINE_WAIT(wait);
int err = -ERESTARTSYS;
if (flags & MSG_DONTWAIT)
return -EAGAIN;
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
for (;;) {
if (signal_pending(current))
break;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = MAX_SCHEDULE_TIMEOUT;
if (sk_wait_event(sk, &timeout, !ctx->more)) {
err = 0;
break;
}
}
finish_wait(sk_sleep(sk), &wait);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
return err;
}
static void aead_data_wakeup(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
struct socket_wq *wq;
if (ctx->more)
return;
if (!ctx->used)
return;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
POLLRDNORM |
POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
static int aead_sendmsg(struct kiocb *unused, struct socket *sock,
struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
unsigned ivsize =
crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
struct aead_sg_list *sgl = &ctx->tsgl;
struct af_alg_control con = {};
long copied = 0;
bool enc = 0;
bool init = 0;
int err = -EINVAL;
if (msg->msg_controllen) {
err = af_alg_cmsg_send(msg, &con);
if (err)
return err;
init = 1;
switch (con.op) {
case ALG_OP_ENCRYPT:
enc = 1;
break;
case ALG_OP_DECRYPT:
enc = 0;
break;
default:
return -EINVAL;
}
if (con.iv && con.iv->ivlen != ivsize)
return -EINVAL;
}
lock_sock(sk);
if (!ctx->more && ctx->used)
goto unlock;
if (init) {
ctx->enc = enc;
if (con.iv)
memcpy(ctx->iv, con.iv->iv, ivsize);
ctx->aead_assoclen = con.aead_assoclen;
}
while (size) {
unsigned long len = size;
struct scatterlist *sg = NULL;
/* use the existing memory in an allocated page */
if (ctx->merge) {
sg = sgl->sg + sgl->cur - 1;
len = min_t(unsigned long, len,
PAGE_SIZE - sg->offset - sg->length);
err = memcpy_from_msg(page_address(sg_page(sg)) +
sg->offset + sg->length,
msg, len);
if (err)
goto unlock;
sg->length += len;
ctx->merge = (sg->offset + sg->length) &
(PAGE_SIZE - 1);
ctx->used += len;
copied += len;
size -= len;
continue;
}
if (!aead_writable(sk)) {
/* user space sent too much data */
aead_put_sgl(sk);
err = -EMSGSIZE;
goto unlock;
}
/* allocate a new page */
len = min_t(unsigned long, size, aead_sndbuf(sk));
while (len) {
int plen = 0;
if (sgl->cur >= ALG_MAX_PAGES) {
aead_put_sgl(sk);
err = -E2BIG;
goto unlock;
}
sg = sgl->sg + sgl->cur;
plen = min_t(int, len, PAGE_SIZE);
sg_assign_page(sg, alloc_page(GFP_KERNEL));
err = -ENOMEM;
if (!sg_page(sg))
goto unlock;
err = memcpy_from_msg(page_address(sg_page(sg)),
msg, plen);
if (err) {
__free_page(sg_page(sg));
sg_assign_page(sg, NULL);
goto unlock;
}
sg->offset = 0;
sg->length = plen;
len -= plen;
ctx->used += plen;
copied += plen;
sgl->cur++;
size -= plen;
ctx->merge = plen & (PAGE_SIZE - 1);
}
}
err = 0;
ctx->more = msg->msg_flags & MSG_MORE;
if (!ctx->more && !aead_sufficient_data(ctx)) {
aead_put_sgl(sk);
err = -EMSGSIZE;
}
unlock:
aead_data_wakeup(sk);
release_sock(sk);
return err ?: copied;
}
static ssize_t aead_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
struct aead_sg_list *sgl = &ctx->tsgl;
int err = -EINVAL;
if (flags & MSG_SENDPAGE_NOTLAST)
flags |= MSG_MORE;
if (sgl->cur >= ALG_MAX_PAGES)
return -E2BIG;
lock_sock(sk);
if (!ctx->more && ctx->used)
goto unlock;
if (!size)
goto done;
if (!aead_writable(sk)) {
/* user space sent too much data */
aead_put_sgl(sk);
err = -EMSGSIZE;
goto unlock;
}
ctx->merge = 0;
get_page(page);
sg_set_page(sgl->sg + sgl->cur, page, size, offset);
sgl->cur++;
ctx->used += size;
err = 0;
done:
ctx->more = flags & MSG_MORE;
if (!ctx->more && !aead_sufficient_data(ctx)) {
aead_put_sgl(sk);
err = -EMSGSIZE;
}
unlock:
aead_data_wakeup(sk);
release_sock(sk);
return err ?: size;
}
static int aead_recvmsg(struct kiocb *unused, struct socket *sock,
struct msghdr *msg, size_t ignored, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
unsigned bs = crypto_aead_blocksize(crypto_aead_reqtfm(&ctx->aead_req));
unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
struct aead_sg_list *sgl = &ctx->tsgl;
struct scatterlist *sg = NULL;
struct scatterlist assoc[ALG_MAX_PAGES];
size_t assoclen = 0;
unsigned int i = 0;
int err = -EINVAL;
unsigned long used = 0;
size_t outlen = 0;
size_t usedpages = 0;
unsigned int cnt = 0;
/* Limit number of IOV blocks to be accessed below */
if (msg->msg_iter.nr_segs > RSGL_MAX_ENTRIES)
return -ENOMSG;
lock_sock(sk);
/*
* AEAD memory structure: For encryption, the tag is appended to the
* ciphertext which implies that the memory allocated for the ciphertext
* must be increased by the tag length. For decryption, the tag
* is expected to be concatenated to the ciphertext. The plaintext
* therefore has a memory size of the ciphertext minus the tag length.
*
* The memory structure for cipher operation has the following
* structure:
* AEAD encryption input: assoc data || plaintext
* AEAD encryption output: cipherntext || auth tag
* AEAD decryption input: assoc data || ciphertext || auth tag
* AEAD decryption output: plaintext
*/
if (ctx->more) {
err = aead_wait_for_data(sk, flags);
if (err)
goto unlock;
}
used = ctx->used;
/*
* Make sure sufficient data is present -- note, the same check is
* is also present in sendmsg/sendpage. The checks in sendpage/sendmsg
* shall provide an information to the data sender that something is
* wrong, but they are irrelevant to maintain the kernel integrity.
* We need this check here too in case user space decides to not honor
* the error message in sendmsg/sendpage and still call recvmsg. This
* check here protects the kernel integrity.
*/
if (!aead_sufficient_data(ctx))
goto unlock;
/*
* The cipher operation input data is reduced by the associated data
* length as this data is processed separately later on.
*/
used -= ctx->aead_assoclen;
if (ctx->enc) {
/* round up output buffer to multiple of block size */
outlen = ((used + bs - 1) / bs * bs);
/* add the size needed for the auth tag to be created */
outlen += as;
} else {
/* output data size is input without the authentication tag */
outlen = used - as;
/* round up output buffer to multiple of block size */
outlen = ((outlen + bs - 1) / bs * bs);
}
/* convert iovecs of output buffers into scatterlists */
while (iov_iter_count(&msg->msg_iter)) {
size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
(outlen - usedpages));
/* make one iovec available as scatterlist */
err = af_alg_make_sg(&ctx->rsgl[cnt], &msg->msg_iter,
seglen);
if (err < 0)
goto unlock;
usedpages += err;
/* chain the new scatterlist with initial list */
if (cnt)
scatterwalk_crypto_chain(ctx->rsgl[0].sg,
ctx->rsgl[cnt].sg, 1,
sg_nents(ctx->rsgl[cnt-1].sg));
/* we do not need more iovecs as we have sufficient memory */
if (outlen <= usedpages)
break;
iov_iter_advance(&msg->msg_iter, err);
cnt++;
}
err = -EINVAL;
/* ensure output buffer is sufficiently large */
if (usedpages < outlen)
goto unlock;
sg_init_table(assoc, ALG_MAX_PAGES);
assoclen = ctx->aead_assoclen;
/*
* Split scatterlist into two: first part becomes AD, second part
* is plaintext / ciphertext. The first part is assigned to assoc
* scatterlist. When this loop finishes, sg points to the start of the
* plaintext / ciphertext.
*/
for (i = 0; i < ctx->tsgl.cur; i++) {
sg = sgl->sg + i;
if (sg->length <= assoclen) {
/* AD is larger than one page */
sg_set_page(assoc + i, sg_page(sg),
sg->length, sg->offset);
assoclen -= sg->length;
if (i >= ctx->tsgl.cur)
goto unlock;
} else if (!assoclen) {
/* current page is to start of plaintext / ciphertext */
if (i)
/* AD terminates at page boundary */
sg_mark_end(assoc + i - 1);
else
/* AD size is zero */
sg_mark_end(assoc);
break;
} else {
/* AD does not terminate at page boundary */
sg_set_page(assoc + i, sg_page(sg),
assoclen, sg->offset);
sg_mark_end(assoc + i);
/* plaintext / ciphertext starts after AD */
sg->length -= assoclen;
sg->offset += assoclen;
break;
}
}
aead_request_set_assoc(&ctx->aead_req, assoc, ctx->aead_assoclen);
aead_request_set_crypt(&ctx->aead_req, sg, ctx->rsgl[0].sg, used,
ctx->iv);
err = af_alg_wait_for_completion(ctx->enc ?
crypto_aead_encrypt(&ctx->aead_req) :
crypto_aead_decrypt(&ctx->aead_req),
&ctx->completion);
if (err) {
/* EBADMSG implies a valid cipher operation took place */
if (err == -EBADMSG)
aead_put_sgl(sk);
goto unlock;
}
aead_put_sgl(sk);
err = 0;
unlock:
for (i = 0; i < cnt; i++)
af_alg_free_sg(&ctx->rsgl[i]);
aead_wmem_wakeup(sk);
release_sock(sk);
return err ? err : outlen;
}
static unsigned int aead_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
unsigned int mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
if (!ctx->more)
mask |= POLLIN | POLLRDNORM;
if (aead_writable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
return mask;
}
static struct proto_ops algif_aead_ops = {
.family = PF_ALG,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.getname = sock_no_getname,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.getsockopt = sock_no_getsockopt,
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.accept = sock_no_accept,
.setsockopt = sock_no_setsockopt,
.release = af_alg_release,
.sendmsg = aead_sendmsg,
.sendpage = aead_sendpage,
.recvmsg = aead_recvmsg,
.poll = aead_poll,
};
static void *aead_bind(const char *name, u32 type, u32 mask)
{
return crypto_alloc_aead(name, type, mask);
}
static void aead_release(void *private)
{
crypto_free_aead(private);
}
static int aead_setauthsize(void *private, unsigned int authsize)
{
return crypto_aead_setauthsize(private, authsize);
}
static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
{
return crypto_aead_setkey(private, key, keylen);
}
static void aead_sock_destruct(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct aead_ctx *ctx = ask->private;
unsigned int ivlen = crypto_aead_ivsize(
crypto_aead_reqtfm(&ctx->aead_req));
aead_put_sgl(sk);
sock_kzfree_s(sk, ctx->iv, ivlen);
sock_kfree_s(sk, ctx, ctx->len);
af_alg_release_parent(sk);
}
static int aead_accept_parent(void *private, struct sock *sk)
{
struct aead_ctx *ctx;
struct alg_sock *ask = alg_sk(sk);
unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(private);
unsigned int ivlen = crypto_aead_ivsize(private);
ctx = sock_kmalloc(sk, len, GFP_KERNEL);
if (!ctx)
return -ENOMEM;
memset(ctx, 0, len);
ctx->iv = sock_kmalloc(sk, ivlen, GFP_KERNEL);
if (!ctx->iv) {
sock_kfree_s(sk, ctx, len);
return -ENOMEM;
}
memset(ctx->iv, 0, ivlen);
ctx->len = len;
ctx->used = 0;
ctx->more = 0;
ctx->merge = 0;
ctx->enc = 0;
ctx->tsgl.cur = 0;
ctx->aead_assoclen = 0;
af_alg_init_completion(&ctx->completion);
sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
ask->private = ctx;
aead_request_set_tfm(&ctx->aead_req, private);
aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
af_alg_complete, &ctx->completion);
sk->sk_destruct = aead_sock_destruct;
return 0;
}
static const struct af_alg_type algif_type_aead = {
.bind = aead_bind,
.release = aead_release,
.setkey = aead_setkey,
.setauthsize = aead_setauthsize,
.accept = aead_accept_parent,
.ops = &algif_aead_ops,
.name = "aead",
.owner = THIS_MODULE
};
static int __init algif_aead_init(void)
{
return af_alg_register_type(&algif_type_aead);
}
static void __exit algif_aead_exit(void)
{
int err = af_alg_unregister_type(&algif_type_aead);
BUG_ON(err);
}
module_init(algif_aead_init);
module_exit(algif_aead_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>");
MODULE_DESCRIPTION("AEAD kernel crypto API user space interface");

View File

@ -87,7 +87,7 @@ static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
return genlen;
err = memcpy_to_msg(msg, result, len);
memzero_explicit(result, genlen);
memzero_explicit(result, len);
return err ? err : len;
}

View File

@ -210,7 +210,11 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx,
byte_count = DEFAULT_BLK_SZ;
}
err = byte_count;
/*
* Return 0 in case of success as mandated by the kernel
* crypto API interface definition.
*/
err = 0;
dbgprint(KERN_CRIT "getting %d random bytes for context %p\n",
byte_count, ctx);

View File

@ -257,6 +257,16 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
mask |= CRYPTO_ALG_TESTED;
}
/*
* If the internal flag is set for a cipher, require a caller to
* to invoke the cipher with the internal flag to use that cipher.
* Also, if a caller wants to allocate a cipher that may or may
* not be an internal cipher, use type | CRYPTO_ALG_INTERNAL and
* !(mask & CRYPTO_ALG_INTERNAL).
*/
if (!((type | mask) & CRYPTO_ALG_INTERNAL))
mask |= CRYPTO_ALG_INTERNAL;
larval = crypto_larval_lookup(name, type, mask);
if (IS_ERR(larval) || !crypto_is_larval(larval))
return larval;

View File

@ -168,6 +168,20 @@ static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm)
return ictx->queue;
}
static inline void cryptd_check_internal(struct rtattr **tb, u32 *type,
u32 *mask)
{
struct crypto_attr_type *algt;
algt = crypto_get_attr_type(tb);
if (IS_ERR(algt))
return;
if ((algt->type & CRYPTO_ALG_INTERNAL))
*type |= CRYPTO_ALG_INTERNAL;
if ((algt->mask & CRYPTO_ALG_INTERNAL))
*mask |= CRYPTO_ALG_INTERNAL;
}
static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent,
const u8 *key, unsigned int keylen)
{
@ -321,10 +335,13 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
struct cryptd_instance_ctx *ctx;
struct crypto_instance *inst;
struct crypto_alg *alg;
u32 type = CRYPTO_ALG_TYPE_BLKCIPHER;
u32 mask = CRYPTO_ALG_TYPE_MASK;
int err;
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
CRYPTO_ALG_TYPE_MASK);
cryptd_check_internal(tb, &type, &mask);
alg = crypto_get_attr_alg(tb, type, mask);
if (IS_ERR(alg))
return PTR_ERR(alg);
@ -341,7 +358,10 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
if (err)
goto out_free_inst;
inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
type |= CRYPTO_ALG_INTERNAL;
inst->alg.cra_flags = type;
inst->alg.cra_type = &crypto_ablkcipher_type;
inst->alg.cra_ablkcipher.ivsize = alg->cra_blkcipher.ivsize;
@ -577,9 +597,13 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
struct ahash_instance *inst;
struct shash_alg *salg;
struct crypto_alg *alg;
u32 type = 0;
u32 mask = 0;
int err;
salg = shash_attr_alg(tb[1], 0, 0);
cryptd_check_internal(tb, &type, &mask);
salg = shash_attr_alg(tb[1], type, mask);
if (IS_ERR(salg))
return PTR_ERR(salg);
@ -598,7 +622,10 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
if (err)
goto out_free_inst;
inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC;
type = CRYPTO_ALG_ASYNC;
if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
type |= CRYPTO_ALG_INTERNAL;
inst->alg.halg.base.cra_flags = type;
inst->alg.halg.digestsize = salg->digestsize;
inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx);
@ -719,10 +746,13 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
struct aead_instance_ctx *ctx;
struct crypto_instance *inst;
struct crypto_alg *alg;
u32 type = CRYPTO_ALG_TYPE_AEAD;
u32 mask = CRYPTO_ALG_TYPE_MASK;
int err;
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_AEAD,
CRYPTO_ALG_TYPE_MASK);
cryptd_check_internal(tb, &type, &mask);
alg = crypto_get_attr_alg(tb, type, mask);
if (IS_ERR(alg))
return PTR_ERR(alg);
@ -739,7 +769,10 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
if (err)
goto out_free_inst;
inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
type |= CRYPTO_ALG_INTERNAL;
inst->alg.cra_flags = type;
inst->alg.cra_type = alg->cra_type;
inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
inst->alg.cra_init = cryptd_aead_init_tfm;

View File

@ -62,10 +62,14 @@ static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
else if (!exact)
match = !strcmp(q->cra_name, p->cru_name);
if (match) {
alg = q;
break;
}
if (!match)
continue;
if (unlikely(!crypto_mod_get(q)))
continue;
alg = q;
break;
}
up_read(&crypto_alg_sem);
@ -205,9 +209,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
if (!alg)
return -ENOENT;
err = -ENOMEM;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
goto drop_alg;
info.in_skb = in_skb;
info.out_skb = skb;
@ -215,6 +220,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
info.nlmsg_flags = 0;
err = crypto_report_alg(alg, &info);
drop_alg:
crypto_mod_put(alg);
if (err)
return err;
@ -284,6 +293,7 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
up_write(&crypto_alg_sem);
crypto_mod_put(alg);
crypto_remove_final(&list);
return 0;
@ -294,6 +304,7 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct crypto_alg *alg;
struct crypto_user_alg *p = nlmsg_data(nlh);
int err;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
@ -310,13 +321,19 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
* if we try to unregister. Unregistering such an algorithm without
* removing the module is not possible, so we restrict to crypto
* instances that are build from templates. */
err = -EINVAL;
if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
return -EINVAL;
goto drop_alg;
if (atomic_read(&alg->cra_refcnt) != 1)
return -EBUSY;
err = -EBUSY;
if (atomic_read(&alg->cra_refcnt) > 2)
goto drop_alg;
return crypto_unregister_instance(alg);
err = crypto_unregister_instance((struct crypto_instance *)alg);
drop_alg:
crypto_mod_put(alg);
return err;
}
static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type,
@ -395,8 +412,10 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
alg = crypto_alg_match(p, exact);
if (alg)
if (alg) {
crypto_mod_put(alg);
return -EEXIST;
}
if (strlen(p->cru_driver_name))
name = p->cru_driver_name;

View File

@ -119,19 +119,19 @@ static const struct drbg_core drbg_cores[] = {
.statelen = 32, /* 256 bits as defined in 10.2.1 */
.blocklen_bytes = 16,
.cra_name = "ctr_aes128",
.backend_cra_name = "ecb(aes)",
.backend_cra_name = "aes",
}, {
.flags = DRBG_CTR | DRBG_STRENGTH192,
.statelen = 40, /* 320 bits as defined in 10.2.1 */
.blocklen_bytes = 16,
.cra_name = "ctr_aes192",
.backend_cra_name = "ecb(aes)",
.backend_cra_name = "aes",
}, {
.flags = DRBG_CTR | DRBG_STRENGTH256,
.statelen = 48, /* 384 bits as defined in 10.2.1 */
.blocklen_bytes = 16,
.cra_name = "ctr_aes256",
.backend_cra_name = "ecb(aes)",
.backend_cra_name = "aes",
},
#endif /* CONFIG_CRYPTO_DRBG_CTR */
#ifdef CONFIG_CRYPTO_DRBG_HASH
@ -308,9 +308,6 @@ static int drbg_ctr_bcc(struct drbg_state *drbg,
drbg_string_fill(&data, out, drbg_blocklen(drbg));
/* 10.4.3 step 1 */
memset(out, 0, drbg_blocklen(drbg));
/* 10.4.3 step 2 / 4 */
list_for_each_entry(curr, in, list) {
const unsigned char *pos = curr->buf;
@ -406,7 +403,6 @@ static int drbg_ctr_df(struct drbg_state *drbg,
memset(pad, 0, drbg_blocklen(drbg));
memset(iv, 0, drbg_blocklen(drbg));
memset(temp, 0, drbg_statelen(drbg));
/* 10.4.2 step 1 is implicit as we work byte-wise */
@ -523,7 +519,6 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed,
unsigned int len = 0;
struct drbg_string cipherin;
memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg));
if (3 > reseed)
memset(df_data, 0, drbg_statelen(drbg));
@ -585,8 +580,6 @@ static int drbg_ctr_generate(struct drbg_state *drbg,
int ret = 0;
struct drbg_string data;
memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
/* 10.2.1.5.2 step 2 */
if (addtl && !list_empty(addtl)) {
ret = drbg_ctr_update(drbg, addtl, 2);
@ -761,7 +754,6 @@ static struct drbg_state_ops drbg_hmac_ops = {
.generate = drbg_hmac_generate,
.crypto_init = drbg_init_hash_kernel,
.crypto_fini = drbg_fini_hash_kernel,
};
#endif /* CONFIG_CRYPTO_DRBG_HMAC */
@ -838,8 +830,6 @@ static int drbg_hash_df(struct drbg_state *drbg,
unsigned char *tmp = drbg->scratchpad + drbg_statelen(drbg);
struct drbg_string data;
memset(tmp, 0, drbg_blocklen(drbg));
/* 10.4.1 step 3 */
input[0] = 1;
drbg_cpu_to_be32((outlen * 8), &input[1]);
@ -879,7 +869,6 @@ static int drbg_hash_update(struct drbg_state *drbg, struct list_head *seed,
unsigned char *V = drbg->scratchpad;
unsigned char prefix = DRBG_PREFIX1;
memset(drbg->scratchpad, 0, drbg_statelen(drbg));
if (!seed)
return -EINVAL;
@ -921,9 +910,6 @@ static int drbg_hash_process_addtl(struct drbg_state *drbg,
LIST_HEAD(datalist);
unsigned char prefix = DRBG_PREFIX2;
/* this is value w as per documentation */
memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
/* 10.1.1.4 step 2 */
if (!addtl || list_empty(addtl))
return 0;
@ -959,9 +945,6 @@ static int drbg_hash_hashgen(struct drbg_state *drbg,
struct drbg_string data;
LIST_HEAD(datalist);
memset(src, 0, drbg_statelen(drbg));
memset(dst, 0, drbg_blocklen(drbg));
/* 10.1.1.4 step hashgen 2 */
memcpy(src, drbg->V, drbg_statelen(drbg));
@ -1018,7 +1001,6 @@ static int drbg_hash_generate(struct drbg_state *drbg,
len = drbg_hash_hashgen(drbg, buf, buflen);
/* this is the value H as documented in 10.1.1.4 */
memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
/* 10.1.1.4 step 4 */
drbg_string_fill(&data1, &prefix, 1);
list_add_tail(&data1.list, &datalist);
@ -1298,7 +1280,7 @@ static void drbg_restore_shadow(struct drbg_state *drbg,
* as defined in SP800-90A. The additional input is mixed into
* the state in addition to the pulled entropy.
*
* return: generated number of bytes
* return: 0 when all bytes are generated; < 0 in case of an error
*/
static int drbg_generate(struct drbg_state *drbg,
unsigned char *buf, unsigned int buflen,
@ -1437,6 +1419,11 @@ static int drbg_generate(struct drbg_state *drbg,
}
#endif
/*
* All operations were successful, return 0 as mandated by
* the kernel crypto API interface.
*/
len = 0;
err:
shadow->d_ops->crypto_fini(shadow);
drbg_restore_shadow(drbg, &shadow);
@ -1644,24 +1631,24 @@ static int drbg_kcapi_hash(struct drbg_state *drbg, const unsigned char *key,
static int drbg_init_sym_kernel(struct drbg_state *drbg)
{
int ret = 0;
struct crypto_blkcipher *tfm;
struct crypto_cipher *tfm;
tfm = crypto_alloc_blkcipher(drbg->core->backend_cra_name, 0, 0);
tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0);
if (IS_ERR(tfm)) {
pr_info("DRBG: could not allocate cipher TFM handle\n");
return PTR_ERR(tfm);
}
BUG_ON(drbg_blocklen(drbg) != crypto_blkcipher_blocksize(tfm));
BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm));
drbg->priv_data = tfm;
return ret;
}
static int drbg_fini_sym_kernel(struct drbg_state *drbg)
{
struct crypto_blkcipher *tfm =
(struct crypto_blkcipher *)drbg->priv_data;
struct crypto_cipher *tfm =
(struct crypto_cipher *)drbg->priv_data;
if (tfm)
crypto_free_blkcipher(tfm);
crypto_free_cipher(tfm);
drbg->priv_data = NULL;
return 0;
}
@ -1669,21 +1656,14 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg)
static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key,
unsigned char *outval, const struct drbg_string *in)
{
int ret = 0;
struct scatterlist sg_in, sg_out;
struct blkcipher_desc desc;
struct crypto_blkcipher *tfm =
(struct crypto_blkcipher *)drbg->priv_data;
struct crypto_cipher *tfm =
(struct crypto_cipher *)drbg->priv_data;
desc.tfm = tfm;
desc.flags = 0;
crypto_blkcipher_setkey(tfm, key, (drbg_keylen(drbg)));
crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg)));
/* there is only component in *in */
sg_init_one(&sg_in, in->buf, in->len);
sg_init_one(&sg_out, outval, drbg_blocklen(drbg));
ret = crypto_blkcipher_encrypt(&desc, &sg_out, &sg_in, in->len);
return ret;
BUG_ON(in->len < drbg_blocklen(drbg));
crypto_cipher_encrypt_one(tfm, outval, in->buf);
return 0;
}
#endif /* CONFIG_CRYPTO_DRBG_CTR */

View File

@ -258,6 +258,20 @@ out_free_inst:
goto out;
}
static inline void mcryptd_check_internal(struct rtattr **tb, u32 *type,
u32 *mask)
{
struct crypto_attr_type *algt;
algt = crypto_get_attr_type(tb);
if (IS_ERR(algt))
return;
if ((algt->type & CRYPTO_ALG_INTERNAL))
*type |= CRYPTO_ALG_INTERNAL;
if ((algt->mask & CRYPTO_ALG_INTERNAL))
*mask |= CRYPTO_ALG_INTERNAL;
}
static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
@ -480,9 +494,13 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
struct ahash_instance *inst;
struct shash_alg *salg;
struct crypto_alg *alg;
u32 type = 0;
u32 mask = 0;
int err;
salg = shash_attr_alg(tb[1], 0, 0);
mcryptd_check_internal(tb, &type, &mask);
salg = shash_attr_alg(tb[1], type, mask);
if (IS_ERR(salg))
return PTR_ERR(salg);
@ -502,7 +520,10 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
if (err)
goto out_free_inst;
inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC;
type = CRYPTO_ALG_ASYNC;
if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
type |= CRYPTO_ALG_INTERNAL;
inst->alg.halg.base.cra_flags = type;
inst->alg.halg.digestsize = salg->digestsize;
inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);

View File

@ -89,6 +89,9 @@ static int c_show(struct seq_file *m, void *p)
seq_printf(m, "selftest : %s\n",
(alg->cra_flags & CRYPTO_ALG_TESTED) ?
"passed" : "unknown");
seq_printf(m, "internal : %s\n",
(alg->cra_flags & CRYPTO_ALG_INTERNAL) ?
"yes" : "no");
if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
seq_printf(m, "type : larval\n");

View File

@ -23,111 +23,49 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <asm/byteorder.h>
static int sha1_init(struct shash_desc *desc)
static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src,
int blocks)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
u32 temp[SHA_WORKSPACE_WORDS];
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
while (blocks--) {
sha_transform(sst->state, src, temp);
src += SHA1_BLOCK_SIZE;
}
memzero_explicit(temp, sizeof(temp));
}
int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial, done;
const u8 *src;
partial = sctx->count % SHA1_BLOCK_SIZE;
sctx->count += len;
done = 0;
src = data;
if ((partial + len) >= SHA1_BLOCK_SIZE) {
u32 temp[SHA_WORKSPACE_WORDS];
if (partial) {
done = -partial;
memcpy(sctx->buffer + partial, data,
done + SHA1_BLOCK_SIZE);
src = sctx->buffer;
}
do {
sha_transform(sctx->state, src, temp);
done += SHA1_BLOCK_SIZE;
src = data + done;
} while (done + SHA1_BLOCK_SIZE <= len);
memzero_explicit(temp, sizeof(temp));
partial = 0;
}
memcpy(sctx->buffer + partial, src, len - done);
return 0;
return sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
}
EXPORT_SYMBOL(crypto_sha1_update);
/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
u32 i, index, padlen;
__be64 bits;
static const u8 padding[64] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 */
index = sctx->count & 0x3f;
padlen = (index < 56) ? (56 - index) : ((64+56) - index);
crypto_sha1_update(desc, padding, padlen);
/* Append length */
crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof *sctx);
return 0;
sha1_base_do_finalize(desc, sha1_generic_block_fn);
return sha1_base_finish(desc, out);
}
static int sha1_export(struct shash_desc *desc, void *out)
int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
return sha1_final(desc, out);
}
EXPORT_SYMBOL(crypto_sha1_finup);
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_init,
.init = sha1_base_init,
.update = crypto_sha1_update,
.final = sha1_final,
.export = sha1_export,
.import = sha1_import,
.finup = crypto_sha1_finup,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-generic",

View File

@ -23,6 +23,7 @@
#include <linux/mm.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
@ -214,138 +215,43 @@ static void sha256_transform(u32 *state, const u8 *input)
memzero_explicit(W, 64 * sizeof(u32));
}
static int sha224_init(struct shash_desc *desc)
static void sha256_generic_block_fn(struct sha256_state *sst, u8 const *src,
int blocks)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
static int sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
while (blocks--) {
sha256_transform(sst->state, src);
src += SHA256_BLOCK_SIZE;
}
}
int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial, done;
const u8 *src;
partial = sctx->count & 0x3f;
sctx->count += len;
done = 0;
src = data;
if ((partial + len) > 63) {
if (partial) {
done = -partial;
memcpy(sctx->buf + partial, data, done + 64);
src = sctx->buf;
}
do {
sha256_transform(sctx->state, src);
done += 64;
src = data + done;
} while (done + 63 < len);
partial = 0;
}
memcpy(sctx->buf + partial, src, len - done);
return 0;
return sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
}
EXPORT_SYMBOL(crypto_sha256_update);
static int sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
__be64 bits;
unsigned int index, pad_len;
int i;
static const u8 padding[64] = { 0x80, };
/* Save number of bits */
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64. */
index = sctx->count & 0x3f;
pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
crypto_sha256_update(desc, padding, pad_len);
/* Append length (before padding) */
crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Zeroize sensitive information. */
memset(sctx, 0, sizeof(*sctx));
return 0;
sha256_base_do_finalize(desc, sha256_generic_block_fn);
return sha256_base_finish(desc, out);
}
static int sha224_final(struct shash_desc *desc, u8 *hash)
int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *hash)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_final(desc, D);
memcpy(hash, D, SHA224_DIGEST_SIZE);
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
static int sha256_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha256_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
return sha256_final(desc, hash);
}
EXPORT_SYMBOL(crypto_sha256_finup);
static struct shash_alg sha256_algs[2] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_init,
.init = sha256_base_init,
.update = crypto_sha256_update,
.final = sha256_final,
.export = sha256_export,
.import = sha256_import,
.finup = crypto_sha256_finup,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name= "sha256-generic",
@ -355,9 +261,10 @@ static struct shash_alg sha256_algs[2] = { {
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_init,
.init = sha224_base_init,
.update = crypto_sha256_update,
.final = sha224_final,
.final = sha256_final,
.finup = crypto_sha256_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",

View File

@ -18,6 +18,7 @@
#include <linux/crypto.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
#include <linux/percpu.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
@ -130,125 +131,42 @@ sha512_transform(u64 *state, const u8 *input)
a = b = c = d = e = f = g = h = t1 = t2 = 0;
}
static int
sha512_init(struct shash_desc *desc)
static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
int blocks)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA512_H0;
sctx->state[1] = SHA512_H1;
sctx->state[2] = SHA512_H2;
sctx->state[3] = SHA512_H3;
sctx->state[4] = SHA512_H4;
sctx->state[5] = SHA512_H5;
sctx->state[6] = SHA512_H6;
sctx->state[7] = SHA512_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int
sha384_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
while (blocks--) {
sha512_transform(sst->state, src);
src += SHA512_BLOCK_SIZE;
}
}
int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, part_len;
/* Compute number of bytes mod 128 */
index = sctx->count[0] & 0x7f;
/* Update number of bytes */
if ((sctx->count[0] += len) < len)
sctx->count[1]++;
part_len = 128 - index;
/* Transform as many times as possible. */
if (len >= part_len) {
memcpy(&sctx->buf[index], data, part_len);
sha512_transform(sctx->state, sctx->buf);
for (i = part_len; i + 127 < len; i+=128)
sha512_transform(sctx->state, &data[i]);
index = 0;
} else {
i = 0;
}
/* Buffer remaining input */
memcpy(&sctx->buf[index], &data[i], len - i);
return 0;
return sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
}
EXPORT_SYMBOL(crypto_sha512_update);
static int
sha512_final(struct shash_desc *desc, u8 *hash)
static int sha512_final(struct shash_desc *desc, u8 *hash)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
static u8 padding[128] = { 0x80, };
__be64 *dst = (__be64 *)hash;
__be64 bits[2];
unsigned int index, pad_len;
int i;
/* Save number of bits */
bits[1] = cpu_to_be64(sctx->count[0] << 3);
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
/* Pad out to 112 mod 128. */
index = sctx->count[0] & 0x7f;
pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
crypto_sha512_update(desc, padding, pad_len);
/* Append length (before padding) */
crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits));
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be64(sctx->state[i]);
/* Zeroize sensitive information. */
memset(sctx, 0, sizeof(struct sha512_state));
return 0;
sha512_base_do_finalize(desc, sha512_generic_block_fn);
return sha512_base_finish(desc, hash);
}
static int sha384_final(struct shash_desc *desc, u8 *hash)
int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *hash)
{
u8 D[64];
sha512_final(desc, D);
memcpy(hash, D, 48);
memzero_explicit(D, 64);
return 0;
sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
return sha512_final(desc, hash);
}
EXPORT_SYMBOL(crypto_sha512_finup);
static struct shash_alg sha512_algs[2] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_init,
.init = sha512_base_init,
.update = crypto_sha512_update,
.final = sha512_final,
.finup = crypto_sha512_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
@ -259,9 +177,10 @@ static struct shash_alg sha512_algs[2] = { {
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_init,
.init = sha384_base_init,
.update = crypto_sha512_update,
.final = sha384_final,
.final = sha512_final,
.finup = crypto_sha512_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",

View File

@ -1155,9 +1155,9 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
goto out_free_req;
}
sg_init_table(sg, TVMEMSIZE);
k = *keysize + *b_size;
sg_init_table(sg, DIV_ROUND_UP(k, PAGE_SIZE));
if (k > PAGE_SIZE) {
sg_set_buf(sg, tvmem[0] + *keysize,
PAGE_SIZE - *keysize);

View File

@ -1474,11 +1474,11 @@ static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
for (j = 0; j < template[i].loops; j++) {
err = crypto_rng_get_bytes(tfm, result,
template[i].rlen);
if (err != template[i].rlen) {
if (err < 0) {
printk(KERN_ERR "alg: cprng: Failed to obtain "
"the correct amount of random data for "
"%s (requested %d, got %d)\n", algo,
template[i].rlen, err);
"%s (requested %d)\n", algo,
template[i].rlen);
goto out;
}
}
@ -1505,7 +1505,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
struct crypto_aead *tfm;
int err = 0;
tfm = crypto_alloc_aead(driver, type, mask);
tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: aead: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(tfm));
@ -1534,7 +1534,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc,
struct crypto_cipher *tfm;
int err = 0;
tfm = crypto_alloc_cipher(driver, type, mask);
tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: cipher: Failed to load transform for "
"%s: %ld\n", driver, PTR_ERR(tfm));
@ -1563,7 +1563,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc,
struct crypto_ablkcipher *tfm;
int err = 0;
tfm = crypto_alloc_ablkcipher(driver, type, mask);
tfm = crypto_alloc_ablkcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: skcipher: Failed to load transform for "
"%s: %ld\n", driver, PTR_ERR(tfm));
@ -1636,7 +1636,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
struct crypto_ahash *tfm;
int err;
tfm = crypto_alloc_ahash(driver, type, mask);
tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: hash: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(tfm));
@ -1664,7 +1664,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc,
if (err)
goto out;
tfm = crypto_alloc_shash(driver, type, mask);
tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(tfm));
@ -1706,7 +1706,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
struct crypto_rng *rng;
int err;
rng = crypto_alloc_rng(driver, type, mask);
rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(rng)) {
printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(rng));
@ -1733,7 +1733,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
if (!buf)
return -ENOMEM;
drng = crypto_alloc_rng(driver, type, mask);
drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(drng)) {
printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for "
"%s\n", driver);
@ -1759,7 +1759,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
ret = crypto_drbg_get_bytes_addtl(drng,
buf, test->expectedlen, &addtl);
}
if (ret <= 0) {
if (ret < 0) {
printk(KERN_ERR "alg: drbg: could not obtain random data for "
"driver %s\n", driver);
goto outbuf;
@ -1774,7 +1774,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
ret = crypto_drbg_get_bytes_addtl(drng,
buf, test->expectedlen, &addtl);
}
if (ret <= 0) {
if (ret < 0) {
printk(KERN_ERR "alg: drbg: could not obtain random data for "
"driver %s\n", driver);
goto outbuf;

View File

@ -101,6 +101,19 @@ config HW_RANDOM_BCM2835
If unsure, say Y.
config HW_RANDOM_IPROC_RNG200
tristate "Broadcom iProc RNG200 support"
depends on ARCH_BCM_IPROC
default HW_RANDOM
---help---
This driver provides kernel-side support for the RNG200
hardware found on the Broadcom iProc SoCs.
To compile this driver as a module, choose M here: the
module will be called iproc-rng200
If unsure, say Y.
config HW_RANDOM_GEODE
tristate "AMD Geode HW Random Number Generator support"
depends on X86_32 && PCI

View File

@ -28,5 +28,6 @@ obj-$(CONFIG_HW_RANDOM_POWERNV) += powernv-rng.o
obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o
obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o
obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o
obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o
obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o

View File

@ -13,24 +13,37 @@
#include <linux/platform_device.h>
#include <linux/hw_random.h>
#include <bcm63xx_io.h>
#include <bcm63xx_regs.h>
#define RNG_CTRL 0x00
#define RNG_EN (1 << 0)
#define RNG_STAT 0x04
#define RNG_AVAIL_MASK (0xff000000)
#define RNG_DATA 0x08
#define RNG_THRES 0x0c
#define RNG_MASK 0x10
struct bcm63xx_rng_priv {
struct hwrng rng;
struct clk *clk;
void __iomem *regs;
};
#define to_rng_priv(rng) ((struct bcm63xx_rng_priv *)rng->priv)
#define to_rng_priv(rng) container_of(rng, struct bcm63xx_rng_priv, rng)
static int bcm63xx_rng_init(struct hwrng *rng)
{
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
u32 val;
int error;
val = bcm_readl(priv->regs + RNG_CTRL);
error = clk_prepare_enable(priv->clk);
if (error)
return error;
val = __raw_readl(priv->regs + RNG_CTRL);
val |= RNG_EN;
bcm_writel(val, priv->regs + RNG_CTRL);
__raw_writel(val, priv->regs + RNG_CTRL);
return 0;
}
@ -40,23 +53,25 @@ static void bcm63xx_rng_cleanup(struct hwrng *rng)
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
u32 val;
val = bcm_readl(priv->regs + RNG_CTRL);
val = __raw_readl(priv->regs + RNG_CTRL);
val &= ~RNG_EN;
bcm_writel(val, priv->regs + RNG_CTRL);
__raw_writel(val, priv->regs + RNG_CTRL);
clk_didsable_unprepare(prov->clk);
}
static int bcm63xx_rng_data_present(struct hwrng *rng, int wait)
{
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
return bcm_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
return __raw_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
}
static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data)
{
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
*data = bcm_readl(priv->regs + RNG_DATA);
*data = __raw_readl(priv->regs + RNG_DATA);
return 4;
}
@ -72,94 +87,53 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
dev_err(&pdev->dev, "no iomem resource\n");
ret = -ENXIO;
goto out;
return -ENXIO;
}
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) {
dev_err(&pdev->dev, "no memory for private structure\n");
ret = -ENOMEM;
goto out;
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->rng.name = pdev->name;
priv->rng.init = bcm63xx_rng_init;
priv->rng.cleanup = bcm63xx_rng_cleanup;
prov->rng.data_present = bcm63xx_rng_data_present;
priv->rng.data_read = bcm63xx_rng_data_read;
priv->clk = devm_clk_get(&pdev->dev, "ipsec");
if (IS_ERR(priv->clk)) {
error = PTR_ERR(priv->clk);
dev_err(&pdev->dev, "no clock for device: %d\n", error);
return error;
}
rng = kzalloc(sizeof(*rng), GFP_KERNEL);
if (!rng) {
dev_err(&pdev->dev, "no memory for rng structure\n");
ret = -ENOMEM;
goto out_free_priv;
}
platform_set_drvdata(pdev, rng);
rng->priv = (unsigned long)priv;
rng->name = pdev->name;
rng->init = bcm63xx_rng_init;
rng->cleanup = bcm63xx_rng_cleanup;
rng->data_present = bcm63xx_rng_data_present;
rng->data_read = bcm63xx_rng_data_read;
clk = clk_get(&pdev->dev, "ipsec");
if (IS_ERR(clk)) {
dev_err(&pdev->dev, "no clock for device\n");
ret = PTR_ERR(clk);
goto out_free_rng;
}
priv->clk = clk;
if (!devm_request_mem_region(&pdev->dev, r->start,
resource_size(r), pdev->name)) {
dev_err(&pdev->dev, "request mem failed");
ret = -ENOMEM;
goto out_free_rng;
return -EBUSY;
}
priv->regs = devm_ioremap_nocache(&pdev->dev, r->start,
resource_size(r));
if (!priv->regs) {
dev_err(&pdev->dev, "ioremap failed");
ret = -ENOMEM;
goto out_free_rng;
return -ENOMEM;
}
clk_enable(clk);
ret = hwrng_register(rng);
if (ret) {
dev_err(&pdev->dev, "failed to register rng device\n");
goto out_clk_disable;
error = devm_hwrng_register(&pdev->dev, &priv->rng);
if (error) {
dev_err(&pdev->dev, "failed to register rng device: %d\n",
error);
return error;
}
dev_info(&pdev->dev, "registered RNG driver\n");
return 0;
out_clk_disable:
clk_disable(clk);
out_free_rng:
kfree(rng);
out_free_priv:
kfree(priv);
out:
return ret;
}
static int bcm63xx_rng_remove(struct platform_device *pdev)
{
struct hwrng *rng = platform_get_drvdata(pdev);
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
hwrng_unregister(rng);
clk_disable(priv->clk);
kfree(priv);
kfree(rng);
return 0;
}
static struct platform_driver bcm63xx_rng_driver = {
.probe = bcm63xx_rng_probe,
.remove = bcm63xx_rng_remove,
.driver = {
.name = "bcm63xx-rng",
},

View File

@ -179,7 +179,8 @@ skip_init:
add_early_randomness(rng);
current_quality = rng->quality ? : default_quality;
current_quality &= 1023;
if (current_quality > 1024)
current_quality = 1024;
if (current_quality == 0 && hwrng_fill)
kthread_stop(hwrng_fill);
@ -536,6 +537,48 @@ void hwrng_unregister(struct hwrng *rng)
}
EXPORT_SYMBOL_GPL(hwrng_unregister);
static void devm_hwrng_release(struct device *dev, void *res)
{
hwrng_unregister(*(struct hwrng **)res);
}
static int devm_hwrng_match(struct device *dev, void *res, void *data)
{
struct hwrng **r = res;
if (WARN_ON(!r || !*r))
return 0;
return *r == data;
}
int devm_hwrng_register(struct device *dev, struct hwrng *rng)
{
struct hwrng **ptr;
int error;
ptr = devres_alloc(devm_hwrng_release, sizeof(*ptr), GFP_KERNEL);
if (!ptr)
return -ENOMEM;
error = hwrng_register(rng);
if (error) {
devres_free(ptr);
return error;
}
*ptr = rng;
devres_add(dev, ptr);
return 0;
}
EXPORT_SYMBOL_GPL(devm_hwrng_register);
void devm_hwrng_unregister(struct device *dev, struct hwrng *rng)
{
devres_release(dev, devm_hwrng_release, devm_hwrng_match, rng);
}
EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
static int __init hwrng_modinit(void)
{
return register_miscdev();

View File

@ -131,16 +131,7 @@ static int exynos_rng_probe(struct platform_device *pdev)
pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_enable(&pdev->dev);
return hwrng_register(&exynos_rng->rng);
}
static int exynos_rng_remove(struct platform_device *pdev)
{
struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
hwrng_unregister(&exynos_rng->rng);
return 0;
return devm_hwrng_register(&pdev->dev, &exynos_rng->rng);
}
#ifdef CONFIG_PM
@ -172,7 +163,6 @@ static struct platform_driver exynos_rng_driver = {
.pm = &exynos_rng_pm_ops,
},
.probe = exynos_rng_probe,
.remove = exynos_rng_remove,
};
module_platform_driver(exynos_rng_driver);

View File

@ -0,0 +1,239 @@
/*
* Copyright (C) 2015 Broadcom Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation version 2.
*
* This program is distributed "as is" WITHOUT ANY WARRANTY of any
* kind, whether express or implied; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
/*
* DESCRIPTION: The Broadcom iProc RNG200 Driver
*/
#include <linux/hw_random.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
/* Registers */
#define RNG_CTRL_OFFSET 0x00
#define RNG_CTRL_RNG_RBGEN_MASK 0x00001FFF
#define RNG_CTRL_RNG_RBGEN_ENABLE 0x00000001
#define RNG_CTRL_RNG_RBGEN_DISABLE 0x00000000
#define RNG_SOFT_RESET_OFFSET 0x04
#define RNG_SOFT_RESET 0x00000001
#define RBG_SOFT_RESET_OFFSET 0x08
#define RBG_SOFT_RESET 0x00000001
#define RNG_INT_STATUS_OFFSET 0x18
#define RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK 0x80000000
#define RNG_INT_STATUS_STARTUP_TRANSITIONS_MET_IRQ_MASK 0x00020000
#define RNG_INT_STATUS_NIST_FAIL_IRQ_MASK 0x00000020
#define RNG_INT_STATUS_TOTAL_BITS_COUNT_IRQ_MASK 0x00000001
#define RNG_FIFO_DATA_OFFSET 0x20
#define RNG_FIFO_COUNT_OFFSET 0x24
#define RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK 0x000000FF
struct iproc_rng200_dev {
struct hwrng rng;
void __iomem *base;
};
#define to_rng_priv(rng) container_of(rng, struct iproc_rng200_dev, rng)
static void iproc_rng200_restart(void __iomem *rng_base)
{
uint32_t val;
/* Disable RBG */
val = ioread32(rng_base + RNG_CTRL_OFFSET);
val &= ~RNG_CTRL_RNG_RBGEN_MASK;
val |= RNG_CTRL_RNG_RBGEN_DISABLE;
iowrite32(val, rng_base + RNG_CTRL_OFFSET);
/* Clear all interrupt status */
iowrite32(0xFFFFFFFFUL, rng_base + RNG_INT_STATUS_OFFSET);
/* Reset RNG and RBG */
val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET);
val |= RBG_SOFT_RESET;
iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET);
val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET);
val |= RNG_SOFT_RESET;
iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET);
val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET);
val &= ~RNG_SOFT_RESET;
iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET);
val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET);
val &= ~RBG_SOFT_RESET;
iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET);
/* Enable RBG */
val = ioread32(rng_base + RNG_CTRL_OFFSET);
val &= ~RNG_CTRL_RNG_RBGEN_MASK;
val |= RNG_CTRL_RNG_RBGEN_ENABLE;
iowrite32(val, rng_base + RNG_CTRL_OFFSET);
}
static int iproc_rng200_read(struct hwrng *rng, void *buf, size_t max,
bool wait)
{
struct iproc_rng200_dev *priv = to_rng_priv(rng);
uint32_t num_remaining = max;
uint32_t status;
#define MAX_RESETS_PER_READ 1
uint32_t num_resets = 0;
#define MAX_IDLE_TIME (1 * HZ)
unsigned long idle_endtime = jiffies + MAX_IDLE_TIME;
while ((num_remaining > 0) && time_before(jiffies, idle_endtime)) {
/* Is RNG sane? If not, reset it. */
status = ioread32(priv->base + RNG_INT_STATUS_OFFSET);
if ((status & (RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK |
RNG_INT_STATUS_NIST_FAIL_IRQ_MASK)) != 0) {
if (num_resets >= MAX_RESETS_PER_READ)
return max - num_remaining;
iproc_rng200_restart(priv->base);
num_resets++;
}
/* Are there any random numbers available? */
if ((ioread32(priv->base + RNG_FIFO_COUNT_OFFSET) &
RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK) > 0) {
if (num_remaining >= sizeof(uint32_t)) {
/* Buffer has room to store entire word */
*(uint32_t *)buf = ioread32(priv->base +
RNG_FIFO_DATA_OFFSET);
buf += sizeof(uint32_t);
num_remaining -= sizeof(uint32_t);
} else {
/* Buffer can only store partial word */
uint32_t rnd_number = ioread32(priv->base +
RNG_FIFO_DATA_OFFSET);
memcpy(buf, &rnd_number, num_remaining);
buf += num_remaining;
num_remaining = 0;
}
/* Reset the IDLE timeout */
idle_endtime = jiffies + MAX_IDLE_TIME;
} else {
if (!wait)
/* Cannot wait, return immediately */
return max - num_remaining;
/* Can wait, give others chance to run */
usleep_range(min(num_remaining * 10, 500U), 500);
}
}
return max - num_remaining;
}
static int iproc_rng200_init(struct hwrng *rng)
{
struct iproc_rng200_dev *priv = to_rng_priv(rng);
uint32_t val;
/* Setup RNG. */
val = ioread32(priv->base + RNG_CTRL_OFFSET);
val &= ~RNG_CTRL_RNG_RBGEN_MASK;
val |= RNG_CTRL_RNG_RBGEN_ENABLE;
iowrite32(val, priv->base + RNG_CTRL_OFFSET);
return 0;
}
static void iproc_rng200_cleanup(struct hwrng *rng)
{
struct iproc_rng200_dev *priv = to_rng_priv(rng);
uint32_t val;
/* Disable RNG hardware */
val = ioread32(priv->base + RNG_CTRL_OFFSET);
val &= ~RNG_CTRL_RNG_RBGEN_MASK;
val |= RNG_CTRL_RNG_RBGEN_DISABLE;
iowrite32(val, priv->base + RNG_CTRL_OFFSET);
}
static int iproc_rng200_probe(struct platform_device *pdev)
{
struct iproc_rng200_dev *priv;
struct resource *res;
struct device *dev = &pdev->dev;
int ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
/* Map peripheral */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
dev_err(dev, "failed to get rng resources\n");
return -EINVAL;
}
priv->base = devm_ioremap_resource(dev, res);
if (IS_ERR(priv->base)) {
dev_err(dev, "failed to remap rng regs\n");
return PTR_ERR(priv->base);
}
priv->rng.name = "iproc-rng200",
priv->rng.read = iproc_rng200_read,
priv->rng.init = iproc_rng200_init,
priv->rng.cleanup = iproc_rng200_cleanup,
/* Register driver */
ret = devm_hwrng_register(dev, &priv->rng);
if (ret) {
dev_err(dev, "hwrng registration failed\n");
return ret;
}
dev_info(dev, "hwrng registered\n");
return 0;
}
static const struct of_device_id iproc_rng200_of_match[] = {
{ .compatible = "brcm,iproc-rng200", },
{},
};
MODULE_DEVICE_TABLE(of, iproc_rng200_of_match);
static struct platform_driver iproc_rng200_driver = {
.driver = {
.name = "iproc-rng200",
.of_match_table = iproc_rng200_of_match,
},
.probe = iproc_rng200_probe,
};
module_platform_driver(iproc_rng200_driver);
MODULE_AUTHOR("Broadcom");
MODULE_DESCRIPTION("iProc RNG200 Random Number Generator driver");
MODULE_LICENSE("GPL v2");

View File

@ -157,7 +157,7 @@ static int msm_rng_probe(struct platform_device *pdev)
rng->hwrng.cleanup = msm_rng_cleanup,
rng->hwrng.read = msm_rng_read,
ret = hwrng_register(&rng->hwrng);
ret = devm_hwrng_register(&pdev->dev, &rng->hwrng);
if (ret) {
dev_err(&pdev->dev, "failed to register hwrng\n");
return ret;
@ -166,14 +166,6 @@ static int msm_rng_probe(struct platform_device *pdev)
return 0;
}
static int msm_rng_remove(struct platform_device *pdev)
{
struct msm_rng *rng = platform_get_drvdata(pdev);
hwrng_unregister(&rng->hwrng);
return 0;
}
static const struct of_device_id msm_rng_of_match[] = {
{ .compatible = "qcom,prng", },
{}
@ -182,7 +174,6 @@ MODULE_DEVICE_TABLE(of, msm_rng_of_match);
static struct platform_driver msm_rng_driver = {
.probe = msm_rng_probe,
.remove = msm_rng_remove,
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = of_match_ptr(msm_rng_of_match),

View File

@ -105,7 +105,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
return 0;
}
static int __exit octeon_rng_remove(struct platform_device *pdev)
static int octeon_rng_remove(struct platform_device *pdev)
{
struct hwrng *rng = platform_get_drvdata(pdev);
@ -119,7 +119,7 @@ static struct platform_driver octeon_rng_driver = {
.name = "octeon_rng",
},
.probe = octeon_rng_probe,
.remove = __exit_p(octeon_rng_remove),
.remove = octeon_rng_remove,
};
module_platform_driver(octeon_rng_driver);

View File

@ -236,7 +236,7 @@ static int omap4_rng_init(struct omap_rng_dev *priv)
u32 val;
/* Return if RNG is already running. */
if (omap_rng_read(priv, RNG_CONFIG_REG) & RNG_CONTROL_ENABLE_TRNG_MASK)
if (omap_rng_read(priv, RNG_CONTROL_REG) & RNG_CONTROL_ENABLE_TRNG_MASK)
return 0;
val = RNG_CONFIG_MIN_REFIL_CYCLES << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT;
@ -262,7 +262,7 @@ static void omap4_rng_cleanup(struct omap_rng_dev *priv)
val = omap_rng_read(priv, RNG_CONTROL_REG);
val &= ~RNG_CONTROL_ENABLE_TRNG_MASK;
omap_rng_write(priv, RNG_CONFIG_REG, val);
omap_rng_write(priv, RNG_CONTROL_REG, val);
}
static irqreturn_t omap4_rng_irq(int irq, void *dev_id)
@ -408,7 +408,7 @@ err_ioremap:
return ret;
}
static int __exit omap_rng_remove(struct platform_device *pdev)
static int omap_rng_remove(struct platform_device *pdev)
{
struct omap_rng_dev *priv = platform_get_drvdata(pdev);
@ -422,9 +422,7 @@ static int __exit omap_rng_remove(struct platform_device *pdev)
return 0;
}
#ifdef CONFIG_PM_SLEEP
static int omap_rng_suspend(struct device *dev)
static int __maybe_unused omap_rng_suspend(struct device *dev)
{
struct omap_rng_dev *priv = dev_get_drvdata(dev);
@ -434,7 +432,7 @@ static int omap_rng_suspend(struct device *dev)
return 0;
}
static int omap_rng_resume(struct device *dev)
static int __maybe_unused omap_rng_resume(struct device *dev)
{
struct omap_rng_dev *priv = dev_get_drvdata(dev);
@ -445,22 +443,15 @@ static int omap_rng_resume(struct device *dev)
}
static SIMPLE_DEV_PM_OPS(omap_rng_pm, omap_rng_suspend, omap_rng_resume);
#define OMAP_RNG_PM (&omap_rng_pm)
#else
#define OMAP_RNG_PM NULL
#endif
static struct platform_driver omap_rng_driver = {
.driver = {
.name = "omap_rng",
.pm = OMAP_RNG_PM,
.pm = &omap_rng_pm,
.of_match_table = of_match_ptr(omap_rng_of_match),
},
.probe = omap_rng_probe,
.remove = __exit_p(omap_rng_remove),
.remove = omap_rng_remove,
};
module_platform_driver(omap_rng_driver);

Some files were not shown because too many files have changed in this diff Show More