diff --git a/arch/x86/configs/anolis_defconfig b/arch/x86/configs/anolis_defconfig index 4fb6c9d9d8b7ee24eb995521f7506237dec8ff0f..0690e5e6c97ab43d8076ed341976284e3ac5411a 100644 --- a/arch/x86/configs/anolis_defconfig +++ b/arch/x86/configs/anolis_defconfig @@ -7083,6 +7083,9 @@ CONFIG_CRYPTO_HW=y CONFIG_CRYPTO_DEV_PADLOCK=m CONFIG_CRYPTO_DEV_PADLOCK_AES=m CONFIG_CRYPTO_DEV_PADLOCK_SHA=m +CONFIG_CRYPTO_DEV_ZHAOXIN=m +CONFIG_CRYPTO_DEV_ZHAOXIN_AES=m +CONFIG_CRYPTO_DEV_ZHAOXIN_SHA=m # CONFIG_CRYPTO_DEV_ATMEL_ECC is not set # CONFIG_CRYPTO_DEV_ATMEL_SHA204A is not set CONFIG_CRYPTO_DEV_CCP=y diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c761952f0dc6df92e1ee37ad5707bb7539c2cef3..3d91550b365f84a9474032cc497647188676432a 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -52,6 +52,45 @@ config CRYPTO_DEV_PADLOCK_SHA If unsure say M. The compiled module will be called padlock-sha. +config CRYPTO_DEV_ZHAOXIN + tristate "Support for Zhaoxin ACE" + depends on X86 && !UML + help + Some Zhaoxin processors come with an integrated crypto engine + (so called Zhaoxin ACE, Advanced Cryptography Engine) + that provides instructions for very fast cryptographic + operations with supported algorithms. + + The instructions are used only when the CPU supports them. + Otherwise software encryption is used. + +config CRYPTO_DEV_ZHAOXIN_AES + tristate "Zhaoxin ACE driver for AES algorithm" + depends on CRYPTO_DEV_ZHAOXIN + select CRYPTO_BLKCIPHER + select CRYPTO_AES + help + Use Zhaoxin ACE for AES algorithm. + + Available in Zhaoxin CPUs. + + If unsure say M. The compiled module will be + called zhaoxin-aes. + +config CRYPTO_DEV_ZHAOXIN_SHA + tristate "Zhaoxin ACE driver for SHA1 and SHA256 algorithms" + depends on CRYPTO_DEV_ZHAOXIN + select CRYPTO_HASH + select CRYPTO_SHA1 + select CRYPTO_SHA256 + help + Use Zhaoxin ACE for SHA1/SHA256 algorithms. + + Available in Zhaoxin processors. + + If unsure say M. The compiled module will be + called zhaoxin-sha. + config CRYPTO_DEV_GEODE tristate "Support for the Geode LX AES engine" depends on X86_32 && PCI diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index d859d6a5f3a45439c6e14bb19d6240e121c9ac62..6bdeecdf9f2f6485ba754d608a44ca2be7d86021 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -31,6 +31,8 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_DES) += omap-des.o obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o +obj-$(CONFIG_CRYPTO_DEV_ZHAOXIN_AES) += zhaoxin-aes.o +obj-$(CONFIG_CRYPTO_DEV_ZHAOXIN_SHA) += zhaoxin-sha.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ obj-$(CONFIG_CRYPTO_DEV_QCOM_RNG) += qcom-rng.o diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 1be549a07a21976dc20ef45ca1ad8c3c40030ee4..f0c3127941ae2f5cea71a4041aafc3e617af2df9 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -475,7 +475,7 @@ static struct skcipher_alg cbc_aes_alg = { }; static const struct x86_cpu_id padlock_cpu_id[] = { - X86_MATCH_FEATURE(X86_FEATURE_XCRYPT, NULL), + { X86_VENDOR_CENTAUR, 6, X86_MODEL_ANY, X86_FEATURE_XCRYPT }, {} }; MODULE_DEVICE_TABLE(x86cpu, padlock_cpu_id); diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 6865c7f1fc1a2343611ed56fd561461bf5d7ec23..04858dc8b59794beabd98dbdfe59a2b1e1305a1a 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -491,7 +491,7 @@ static struct shash_alg sha256_alg_nano = { }; static const struct x86_cpu_id padlock_sha_ids[] = { - X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL), + { X86_VENDOR_CENTAUR, 6, X86_MODEL_ANY, X86_FEATURE_PHE }, {} }; MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids); diff --git a/drivers/crypto/zhaoxin-aes.c b/drivers/crypto/zhaoxin-aes.c new file mode 100644 index 0000000000000000000000000000000000000000..e1d029fa9d1ab84df36ef5e6ec9b49d9cb2308be --- /dev/null +++ b/drivers/crypto/zhaoxin-aes.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support for ACE hardware crypto engine. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_VERSION "1.0.0" + +/* + * Number of data blocks actually fetched for each xcrypt insn. + * Processors with prefetch errata will fetch extra blocks. + */ +static unsigned int ecb_fetch_blocks = 2; +#define MAX_ECB_FETCH_BLOCKS (8) +#define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) + +static unsigned int cbc_fetch_blocks = 1; +#define MAX_CBC_FETCH_BLOCKS (4) +#define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) + +/* Control word. */ +struct cword { + unsigned int __packed + rounds:4, + algo:3, + keygen:1, + interm:1, + encdec:1, + ksize:2; +} __aligned(PADLOCK_ALIGNMENT); + +/* + * Whenever making any changes to the following structure *make sure* you keep E, d_data and cword + * aligned on 16 Bytes boundaries and the Hardware can access 16 * 16 bytes of E and d_data (only + * the first 15 * 16 bytes matter but the HW reads more). + */ +struct aes_ctx { + u32 E[AES_MAX_KEYLENGTH_U32] __aligned(PADLOCK_ALIGNMENT); + u32 d_data[AES_MAX_KEYLENGTH_U32] __aligned(PADLOCK_ALIGNMENT); + struct { + struct cword encrypt; + struct cword decrypt; + } cword; + u32 *D; +}; + +static DEFINE_PER_CPU(struct cword *, zx_paes_last_cword); + +/* Tells whether the ACE is capable to generate the extended key for a given key_len. */ +static inline int aes_hw_extkey_available(uint8_t key_len) +{ + /* + * TODO: We should check the actual CPU model/stepping as it's possible that the + * capability will be added in the next CPU revisions. + */ + if (key_len == 16) + return 1; + return 0; +} + +static inline struct aes_ctx *aes_ctx_common(void *ctx) +{ + unsigned long addr = (unsigned long)ctx; + unsigned long align = PADLOCK_ALIGNMENT; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return (struct aes_ctx *)ALIGN(addr, align); +} + +static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) +{ + return aes_ctx_common(crypto_tfm_ctx(tfm)); +} + +static inline struct aes_ctx *skcipher_aes_ctx(struct crypto_skcipher *tfm) +{ + return aes_ctx_common(crypto_skcipher_ctx(tfm)); +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) +{ + struct aes_ctx *ctx = aes_ctx(tfm); + const __le32 *key = (const __le32 *)in_key; + struct crypto_aes_ctx gen_aes; + int cpu; + + if (key_len % 8) + return -EINVAL; + + /* + * If the hardware is capable of generating the extended key itself we must supply the + * plain key for both encryption and decryption. + */ + ctx->D = ctx->E; + + ctx->E[0] = le32_to_cpu(key[0]); + ctx->E[1] = le32_to_cpu(key[1]); + ctx->E[2] = le32_to_cpu(key[2]); + ctx->E[3] = le32_to_cpu(key[3]); + + /* Prepare control words. */ + memset(&ctx->cword, 0, sizeof(ctx->cword)); + + ctx->cword.decrypt.encdec = 1; + ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4; + ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds; + ctx->cword.encrypt.ksize = (key_len - 16) / 8; + ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize; + + /* Don't generate extended keys if the hardware can do it. */ + if (aes_hw_extkey_available(key_len)) + goto ok; + + ctx->D = ctx->d_data; + ctx->cword.encrypt.keygen = 1; + ctx->cword.decrypt.keygen = 1; + + if (aes_expandkey(&gen_aes, in_key, key_len)) + return -EINVAL; + + memcpy(ctx->E, gen_aes.key_enc, AES_MAX_KEYLENGTH); + memcpy(ctx->D, gen_aes.key_dec, AES_MAX_KEYLENGTH); + +ok: + for_each_online_cpu(cpu) + if (&ctx->cword.encrypt == per_cpu(zx_paes_last_cword, cpu) || + &ctx->cword.decrypt == per_cpu(zx_paes_last_cword, cpu)) + per_cpu(zx_paes_last_cword, cpu) = NULL; + + return 0; +} + +static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len); +} + +/* ====== Encryption/decryption routines ====== */ + +/* These are the real call to PadLock. */ +static inline void padlock_reset_key(struct cword *cword) +{ + int cpu = raw_smp_processor_id(); + + if (cword != per_cpu(zx_paes_last_cword, cpu)) +#ifndef CONFIG_X86_64 + asm volatile ("pushfl; popfl"); +#else + asm volatile ("pushfq; popfq"); +#endif +} + +static inline void padlock_store_cword(struct cword *cword) +{ + per_cpu(zx_paes_last_cword, raw_smp_processor_id()) = cword; +} + +/* + * While the padlock instructions don't use FP/SSE registers, they generate a spurious DNA fault + * when CR0.TS is '1'. Fortunately, the kernel doesn't use CR0.TS. + */ +static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, + struct cword *control_word, int count) +{ + asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ + : "+S"(input), "+D"(output) + : "d"(control_word), "b"(key), "c"(count)); +} + +static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key, u8 *iv, + struct cword *control_word, int count) +{ + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ + : "+S" (input), "+D" (output), "+a" (iv) + : "d" (control_word), "b" (key), "c" (count)); + return iv; +} + +static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key, struct cword *cword, int count) +{ + /* + * Padlock prefetches extra data so we must provide mapped input buffers. + * Assume there are at least 16 bytes of stack already in use. + */ + u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; + u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); + + memcpy(tmp, in, count * AES_BLOCK_SIZE); + rep_xcrypt_ecb(tmp, out, key, cword, count); +} + +static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key, u8 *iv, struct cword *cword, int count) +{ + /* + * Padlock prefetches extra data so we must provide mapped input buffers. + * Assume there are at least 16 bytes of stack already in use. + */ + u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; + u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); + + memcpy(tmp, in, count * AES_BLOCK_SIZE); + return rep_xcrypt_cbc(tmp, out, key, iv, cword, count); +} + +static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key, struct cword *cword, int count) +{ + /* + * Padlock in ECB mode fetches at least ecb_fetch_bytes of data. + * We could avoid some copying here but it's probably not worth it. + */ + if (unlikely(offset_in_page(in) + ecb_fetch_bytes > PAGE_SIZE)) { + ecb_crypt_copy(in, out, key, cword, count); + return; + } + + rep_xcrypt_ecb(in, out, key, cword, count); +} + +static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key, u8 *iv, struct cword *cword, + int count) +{ + /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */ + if (unlikely(offset_in_page(in) + cbc_fetch_bytes > PAGE_SIZE)) + return cbc_crypt_copy(in, out, key, iv, cword, count); + + return rep_xcrypt_cbc(in, out, key, iv, cword, count); +} + +static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, void *control_word, + u32 count) +{ + u32 initial = count & (ecb_fetch_blocks - 1); + + if (count < ecb_fetch_blocks) { + ecb_crypt(input, output, key, control_word, count); + return; + } + + count -= initial; + + if (initial) + asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ + : "+S"(input), "+D"(output) + : "d"(control_word), "b"(key), "c"(initial)); + + asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ + : "+S"(input), "+D"(output) + : "d"(control_word), "b"(key), "c"(count)); +} + +static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, u8 *iv, + void *control_word, u32 count) +{ + u32 initial = count & (cbc_fetch_blocks - 1); + + if (count < cbc_fetch_blocks) + return cbc_crypt(input, output, key, iv, control_word, count); + + count -= initial; + + if (initial) + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ + : "+S" (input), "+D" (output), "+a" (iv) + : "d" (control_word), "b" (key), "c" (initial)); + + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ + : "+S" (input), "+D" (output), "+a" (iv) + : "d" (control_word), "b" (key), "c" (count)); + return iv; +} + +static void padlock_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct aes_ctx *ctx = aes_ctx(tfm); + + padlock_reset_key(&ctx->cword.encrypt); + ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); + padlock_store_cword(&ctx->cword.encrypt); +} + +static void padlock_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct aes_ctx *ctx = aes_ctx(tfm); + + padlock_reset_key(&ctx->cword.encrypt); + ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); + padlock_store_cword(&ctx->cword.encrypt); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-padlock", + .cra_priority = PADLOCK_CRA_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aes_ctx), + .cra_alignmask = PADLOCK_ALIGNMENT - 1, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = padlock_aes_encrypt, + .cia_decrypt = padlock_aes_decrypt, + } + } +}; + +static int ecb_aes_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aes_ctx *ctx = skcipher_aes_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + padlock_reset_key(&ctx->cword.encrypt); + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) != 0) { + padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, + ctx->E, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + padlock_store_cword(&ctx->cword.encrypt); + + return err; +} + +static int ecb_aes_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aes_ctx *ctx = skcipher_aes_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + padlock_reset_key(&ctx->cword.decrypt); + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) != 0) { + padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, + ctx->D, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + padlock_store_cword(&ctx->cword.encrypt); + + return err; +} + +static struct skcipher_alg ecb_aes_alg = { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-padlock", + .base.cra_priority = PADLOCK_COMPOSITE_PRIORITY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aes_ctx), + .base.cra_alignmask = PADLOCK_ALIGNMENT - 1, + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key_skcipher, + .encrypt = ecb_aes_encrypt, + .decrypt = ecb_aes_decrypt, +}; + +static int cbc_aes_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aes_ctx *ctx = skcipher_aes_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + padlock_reset_key(&ctx->cword.encrypt); + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) != 0) { + u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, + walk.dst.virt.addr, ctx->E, + walk.iv, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + memcpy(walk.iv, iv, AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + padlock_store_cword(&ctx->cword.decrypt); + + return err; +} + +static int cbc_aes_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aes_ctx *ctx = skcipher_aes_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + padlock_reset_key(&ctx->cword.encrypt); + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) != 0) { + padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, + ctx->D, walk.iv, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = skcipher_walk_done(&walk, nbytes); + } + + padlock_store_cword(&ctx->cword.encrypt); + + return err; +} + +static struct skcipher_alg cbc_aes_alg = { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-padlock", + .base.cra_priority = PADLOCK_COMPOSITE_PRIORITY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aes_ctx), + .base.cra_alignmask = PADLOCK_ALIGNMENT - 1, + .base.cra_module = THIS_MODULE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aes_set_key_skcipher, + .encrypt = cbc_aes_encrypt, + .decrypt = cbc_aes_decrypt, +}; + +static const struct x86_cpu_id zhaoxin_cpu_id[] = { + { X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, X86_STEPPING_ANY, X86_FEATURE_XCRYPT }, + { X86_VENDOR_ZHAOXIN, 7, X86_MODEL_ANY, X86_STEPPING_ANY, X86_FEATURE_XCRYPT }, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, zhaoxin_cpu_id); + +static int __init padlock_init(void) +{ + int ret; + + if (!x86_match_cpu(zhaoxin_cpu_id)) + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_XCRYPT_EN)) { + pr_notice("ACE detected, but not enabled. Hmm, strange...\n"); + return -ENODEV; + } + + ret = crypto_register_alg(&aes_alg); + if (!!ret) + goto aes_err; + + ret = crypto_register_skcipher(&ecb_aes_alg); + if (!!ret) + goto ecb_aes_err; + + ret = crypto_register_skcipher(&cbc_aes_alg); + if (!!ret) + goto cbc_aes_err; + + pr_notice("Using ACE for AES algorithm.\n"); + +out: + return ret; + +cbc_aes_err: + crypto_unregister_skcipher(&ecb_aes_alg); +ecb_aes_err: + crypto_unregister_alg(&aes_alg); +aes_err: + pr_err("ACE AES initialization failed.\n"); + goto out; +} + +static void __exit padlock_fini(void) +{ + crypto_unregister_skcipher(&cbc_aes_alg); + crypto_unregister_skcipher(&ecb_aes_alg); + crypto_unregister_alg(&aes_alg); +} + +module_init(padlock_init); +module_exit(padlock_fini); + +MODULE_DESCRIPTION("ACE AES algorithm support"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Ludvig"); +MODULE_VERSION(DRIVER_VERSION); + +MODULE_ALIAS_CRYPTO("aes"); diff --git a/drivers/crypto/zhaoxin-sha.c b/drivers/crypto/zhaoxin-sha.c new file mode 100644 index 0000000000000000000000000000000000000000..840805f36838e5a2bf6791684e67f35d8698da90 --- /dev/null +++ b/drivers/crypto/zhaoxin-sha.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support for ACE hardware crypto engine. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_VERSION "1.0.0" + +static inline void padlock_output_block(uint32_t *src, uint32_t *dst, size_t count) +{ + while (count--) + *dst++ = swab32(*src++); +} + +/* + * Add two shash_alg instance for hardware-implemented multiple-parts hash + * supported by Zhaoxin Processor. + */ +static int padlock_sha1_init_zhaoxin(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static int padlock_sha1_update_zhaoxin(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial, done; + const u8 *src; + + /* The PHE require the out buffer must 128 bytes and 16-bytes aligned */ + u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __aligned(STACK_ALIGN); + u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); + + partial = sctx->count & 0x3f; + sctx->count += len; + done = 0; + src = data; + memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE); + + if ((partial + len) >= SHA1_BLOCK_SIZE) { + /* Append the bytes in state's buffer to a block to handle */ + if (partial) { + done = -partial; + memcpy(sctx->buffer + partial, data, done + SHA1_BLOCK_SIZE); + src = sctx->buffer; + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"(1UL)); + done += SHA1_BLOCK_SIZE; + src = data + done; + } + + /* Process the left bytes from the input data */ + if (len - done >= SHA1_BLOCK_SIZE) { + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); + done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); + src = data + done; + } + partial = 0; + } + memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE); + memcpy(sctx->buffer + partial, src, len - done); + + return 0; +} + +static int padlock_sha1_final_zhaoxin(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc); + unsigned int partial, padlen; + __be64 bits; + static const u8 padding[64] = { 0x80, }; + + bits = cpu_to_be64(state->count << 3); + + /* Pad out to 56 mod 64 */ + partial = state->count & 0x3f; + padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); + padlock_sha1_update_zhaoxin(desc, padding, padlen); + + /* Append length field bytes */ + padlock_sha1_update_zhaoxin(desc, (const u8 *)&bits, sizeof(bits)); + + /* Swap to output */ + padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5); + + return 0; +} + +static int padlock_sha256_init_zhaoxin(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state) { + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7 + }, + }; + + return 0; +} + +static int padlock_sha256_update_zhaoxin(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial, done; + const u8 *src; + + /* The PHE require the out buffer must 128 bytes and 16-bytes aligned */ + u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __aligned(STACK_ALIGN); + u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); + + partial = sctx->count & 0x3f; + sctx->count += len; + done = 0; + src = data; + memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE); + + if ((partial + len) >= SHA256_BLOCK_SIZE) { + + /* Append the bytes in state's buffer to a block to handle */ + if (partial) { + done = -partial; + memcpy(sctx->buf + partial, data, done + SHA256_BLOCK_SIZE); + src = sctx->buf; + asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"(1UL)); + done += SHA256_BLOCK_SIZE; + src = data + done; + } + + /* Process the left bytes from input data */ + if (len - done >= SHA256_BLOCK_SIZE) { + asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"((unsigned long)((len - done) / 64))); + done += ((len - done) - (len - done) % 64); + src = data + done; + } + partial = 0; + } + memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE); + memcpy(sctx->buf + partial, src, len - done); + + return 0; +} + +static int padlock_sha256_final_zhaoxin(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *state = (struct sha256_state *)shash_desc_ctx(desc); + unsigned int partial, padlen; + __be64 bits; + static const u8 padding[64] = { 0x80, }; + + bits = cpu_to_be64(state->count << 3); + + /* Pad out to 56 mod 64 */ + partial = state->count & 0x3f; + padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); + padlock_sha256_update_zhaoxin(desc, padding, padlen); + + /* Append length field bytes */ + padlock_sha256_update_zhaoxin(desc, (const u8 *)&bits, sizeof(bits)); + + /* Swap to output */ + padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8); + + return 0; +} + +static int padlock_sha_export_zhaoxin(struct shash_desc *desc, void *out) +{ + int statesize = crypto_shash_statesize(desc->tfm); + void *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, statesize); + return 0; +} + +static int padlock_sha_import_zhaoxin(struct shash_desc *desc, const void *in) +{ + int statesize = crypto_shash_statesize(desc->tfm); + void *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, statesize); + return 0; +} + +static struct shash_alg sha1_alg_zhaoxin = { + .digestsize = SHA1_DIGEST_SIZE, + .init = padlock_sha1_init_zhaoxin, + .update = padlock_sha1_update_zhaoxin, + .final = padlock_sha1_final_zhaoxin, + .export = padlock_sha_export_zhaoxin, + .import = padlock_sha_import_zhaoxin, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-padlock-zhaoxin", + .cra_priority = PADLOCK_CRA_PRIORITY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha256_alg_zhaoxin = { + .digestsize = SHA256_DIGEST_SIZE, + .init = padlock_sha256_init_zhaoxin, + .update = padlock_sha256_update_zhaoxin, + .final = padlock_sha256_final_zhaoxin, + .export = padlock_sha_export_zhaoxin, + .import = padlock_sha_import_zhaoxin, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-padlock-zhaoxin", + .cra_priority = PADLOCK_CRA_PRIORITY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static const struct x86_cpu_id zhaoxin_sha_ids[] = { + { X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, X86_STEPPING_ANY, X86_FEATURE_PHE }, + { X86_VENDOR_ZHAOXIN, 7, X86_MODEL_ANY, X86_STEPPING_ANY, X86_FEATURE_PHE }, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, zhaoxin_sha_ids); + +static int __init padlock_init(void) +{ + int rc = -ENODEV; + struct shash_alg *sha1; + struct shash_alg *sha256; + + if (!x86_match_cpu(zhaoxin_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN)) + return -ENODEV; + + sha1 = &sha1_alg_zhaoxin; + sha256 = &sha256_alg_zhaoxin; + + rc = crypto_register_shash(sha1); + if (rc) + goto out; + + rc = crypto_register_shash(sha256); + if (rc) + goto out_unreg1; + + pr_notice("Using ACE for SHA1/SHA256 algorithms.\n"); + + return 0; + +out_unreg1: + crypto_unregister_shash(sha1); + +out: + pr_err("ACE SHA1/SHA256 initialization failed.\n"); + return rc; +} + +static void __exit padlock_fini(void) +{ + crypto_unregister_shash(&sha1_alg_zhaoxin); + crypto_unregister_shash(&sha256_alg_zhaoxin); +} + +module_init(padlock_init); +module_exit(padlock_fini); + +MODULE_DESCRIPTION("ACE SHA1/SHA256 algorithms support."); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Ludvig"); +MODULE_VERSION(DRIVER_VERSION); + +MODULE_ALIAS_CRYPTO("sha1-all"); +MODULE_ALIAS_CRYPTO("sha256-all"); +MODULE_ALIAS_CRYPTO("sha1-padlock"); +MODULE_ALIAS_CRYPTO("sha256-padlock");