From 840920fa28ef54c3caa08be40253e1c9b5169d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 23 Feb 2015 09:33:42 +0200 Subject: main/openssl: add missing patches --- ...i-compat-with-no-freelist-and-regular-bui.patch | 27 + ...-support-EVP_MD_CTX_FLAG_ONESHOT-and-set-.patch | 88 +++ ...port-changes-from-upstream-padlock-module.patch | 200 ++++++ ...adlock-implement-sha1-sha224-sha256-accel.patch | 782 +++++++++++++++++++++ ...to-engine-autoload-padlock-dynamic-engine.patch | 32 + 5 files changed, 1129 insertions(+) create mode 100644 main/openssl/0008-maintain-abi-compat-with-no-freelist-and-regular-bui.patch create mode 100644 main/openssl/0009-crypto-hmac-support-EVP_MD_CTX_FLAG_ONESHOT-and-set-.patch create mode 100644 main/openssl/0010-backport-changes-from-upstream-padlock-module.patch create mode 100644 main/openssl/0011-engines-e_padlock-implement-sha1-sha224-sha256-accel.patch create mode 100644 main/openssl/0012-crypto-engine-autoload-padlock-dynamic-engine.patch (limited to 'main/openssl') diff --git a/main/openssl/0008-maintain-abi-compat-with-no-freelist-and-regular-bui.patch b/main/openssl/0008-maintain-abi-compat-with-no-freelist-and-regular-bui.patch new file mode 100644 index 0000000000..ff3d25eff0 --- /dev/null +++ b/main/openssl/0008-maintain-abi-compat-with-no-freelist-and-regular-bui.patch @@ -0,0 +1,27 @@ +From 7457e26d3a78c7cd923242d87d04febadddea086 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= +Date: Thu, 5 Feb 2015 10:06:31 +0200 +Subject: [PATCH] maintain abi compat with no-freelist and regular build + +--- + ssl/ssl.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/ssl/ssl.h b/ssl/ssl.h +index 2b0f662..636cb5d 100644 +--- a/ssl/ssl.h ++++ b/ssl/ssl.h +@@ -1113,6 +1113,10 @@ struct ssl_ctx_st { + unsigned int freelist_max_len; + struct ssl3_buf_freelist_st *wbuf_freelist; + struct ssl3_buf_freelist_st *rbuf_freelist; ++# else ++ unsigned int freelist_dummy0; ++ void *freelist_dummy1; ++ void *freelist_dummy2; + # endif + # ifndef OPENSSL_NO_SRP + SRP_CTX srp_ctx; /* ctx for SRP authentication */ +-- +2.2.2 + diff --git a/main/openssl/0009-crypto-hmac-support-EVP_MD_CTX_FLAG_ONESHOT-and-set-.patch b/main/openssl/0009-crypto-hmac-support-EVP_MD_CTX_FLAG_ONESHOT-and-set-.patch new file mode 100644 index 0000000000..ef46faa848 --- /dev/null +++ b/main/openssl/0009-crypto-hmac-support-EVP_MD_CTX_FLAG_ONESHOT-and-set-.patch @@ -0,0 +1,88 @@ +From 83c96cbc76604daccbc31cea9411555aea96fd6d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= +Date: Thu, 5 Feb 2015 09:16:51 +0200 +Subject: [PATCH] crypto/hmac: support EVP_MD_CTX_FLAG_ONESHOT and set it + properly + +Some engines (namely VIA C7 Padlock) work only if EVP_MD_CTX_FLAG_ONESHOT +is set before final update. This is because some crypto accelerators cannot +perform non-finalizing transform of the digest. + +The usage of EVP_MD_CTX_FLAG_ONESHOT is used semantically slightly +differently here. It is set before the final EVP_DigestUpdate call, not +necessarily before EVP_DigestInit call. This will not cause any problems +though. +--- + crypto/hmac/hmac.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/crypto/hmac/hmac.c b/crypto/hmac/hmac.c +index 1fc9e2c..6f16578 100644 +--- a/crypto/hmac/hmac.c ++++ b/crypto/hmac/hmac.c +@@ -109,7 +109,8 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, + j = EVP_MD_block_size(md); + OPENSSL_assert(j <= (int)sizeof(ctx->key)); + if (j < len) { +- if (!EVP_DigestInit_ex(&ctx->md_ctx, md, impl)) ++ EVP_MD_CTX_set_flags(&ctx->md_ctx, EVP_MD_CTX_FLAG_ONESHOT); ++ if (!EVP_DigestInit_ex(&ctx->md_ctx, md, impl)) + goto err; + if (!EVP_DigestUpdate(&ctx->md_ctx, key, len)) + goto err; +@@ -129,6 +130,7 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, + if (reset) { + for (i = 0; i < HMAC_MAX_MD_CBLOCK; i++) + pad[i] = 0x36 ^ ctx->key[i]; ++ EVP_MD_CTX_clear_flags(&ctx->i_ctx, EVP_MD_CTX_FLAG_ONESHOT); + if (!EVP_DigestInit_ex(&ctx->i_ctx, md, impl)) + goto err; + if (!EVP_DigestUpdate(&ctx->i_ctx, pad, EVP_MD_block_size(md))) +@@ -136,6 +138,7 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, + + for (i = 0; i < HMAC_MAX_MD_CBLOCK; i++) + pad[i] = 0x5c ^ ctx->key[i]; ++ EVP_MD_CTX_clear_flags(&ctx->o_ctx, EVP_MD_CTX_FLAG_ONESHOT); + if (!EVP_DigestInit_ex(&ctx->o_ctx, md, impl)) + goto err; + if (!EVP_DigestUpdate(&ctx->o_ctx, pad, EVP_MD_block_size(md))) +@@ -143,6 +146,7 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, + } + if (!EVP_MD_CTX_copy_ex(&ctx->md_ctx, &ctx->i_ctx)) + goto err; ++ EVP_MD_CTX_clear_flags(&ctx->md_ctx, EVP_MD_CTX_FLAG_ONESHOT); + return 1; + err: + return 0; +@@ -177,6 +181,7 @@ int HMAC_Final(HMAC_CTX *ctx, unsigned char *md, unsigned int *len) + goto err; + if (!EVP_MD_CTX_copy_ex(&ctx->md_ctx, &ctx->o_ctx)) + goto err; ++ EVP_MD_CTX_set_flags(&ctx->md_ctx,EVP_MD_CTX_FLAG_ONESHOT); + if (!EVP_DigestUpdate(&ctx->md_ctx, buf, i)) + goto err; + if (!EVP_DigestFinal_ex(&ctx->md_ctx, md, len)) +@@ -233,8 +238,9 @@ unsigned char *HMAC(const EVP_MD *evp_md, const void *key, int key_len, + if (md == NULL) + md = m; + HMAC_CTX_init(&c); +- if (!HMAC_Init(&c, key, key_len, evp_md)) ++ if (!HMAC_Init_ex(&c, key, key_len, evp_md, NULL)) + goto err; ++ HMAC_CTX_set_flags(&c,EVP_MD_CTX_FLAG_ONESHOT); + if (!HMAC_Update(&c, d, n)) + goto err; + if (!HMAC_Final(&c, md, md_len)) +@@ -247,7 +253,7 @@ unsigned char *HMAC(const EVP_MD *evp_md, const void *key, int key_len, + + void HMAC_CTX_set_flags(HMAC_CTX *ctx, unsigned long flags) + { +- EVP_MD_CTX_set_flags(&ctx->i_ctx, flags); +- EVP_MD_CTX_set_flags(&ctx->o_ctx, flags); ++ EVP_MD_CTX_set_flags(&ctx->i_ctx, flags & ~EVP_MD_CTX_FLAG_ONESHOT); ++ EVP_MD_CTX_set_flags(&ctx->o_ctx, flags & ~EVP_MD_CTX_FLAG_ONESHOT); + EVP_MD_CTX_set_flags(&ctx->md_ctx, flags); + } +-- +2.2.2 + diff --git a/main/openssl/0010-backport-changes-from-upstream-padlock-module.patch b/main/openssl/0010-backport-changes-from-upstream-padlock-module.patch new file mode 100644 index 0000000000..f63bbcd1ce --- /dev/null +++ b/main/openssl/0010-backport-changes-from-upstream-padlock-module.patch @@ -0,0 +1,200 @@ +From ba17588a940ee712c3ef6d458adb1087f0c84521 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= +Date: Thu, 5 Feb 2015 09:28:10 +0200 +Subject: [PATCH] backport changes from upstream padlock module. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Includes support for VIA Nano 64-bit mode. + +Signed-off-by: Timo Teräs +--- + engines/e_padlock.c | 142 +++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 125 insertions(+), 17 deletions(-) + +diff --git a/engines/e_padlock.c b/engines/e_padlock.c +index 2898e4c..94406cb 100644 +--- a/engines/e_padlock.c ++++ b/engines/e_padlock.c +@@ -101,7 +101,10 @@ + */ + # undef COMPILE_HW_PADLOCK + # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) +-# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ ++# if (defined(__GNUC__) && __GNUC__>=2 && \ ++ (defined(__i386__) || defined(__i386) || \ ++ defined(__x86_64__) || defined(__x86_64)) \ ++ ) || \ + (defined(_MSC_VER) && defined(_M_IX86)) + # define COMPILE_HW_PADLOCK + # endif +@@ -303,6 +306,7 @@ static volatile struct padlock_cipher_data *padlock_saved_context; + * ======================================================= + */ + # if defined(__GNUC__) && __GNUC__>=2 ++# if defined(__i386__) || defined(__i386) + /* + * As for excessive "push %ebx"/"pop %ebx" found all over. + * When generating position-independent code GCC won't let +@@ -379,22 +383,6 @@ static int padlock_available(void) + return padlock_use_ace + padlock_use_rng; + } + +-# ifndef OPENSSL_NO_AES +-# ifndef AES_ASM +-/* Our own htonl()/ntohl() */ +-static inline void padlock_bswapl(AES_KEY *ks) +-{ +- size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); +- unsigned int *key = ks->rd_key; +- +- while (i--) { +- asm volatile ("bswapl %0":"+r" (*key)); +- key++; +- } +-} +-# endif +-# endif +- + /* + * Force key reload from memory to the CPU microcode. Loading EFLAGS from the + * stack clears EFLAGS[30] which does the trick. +@@ -448,6 +436,110 @@ static inline void *name(size_t cnt, \ + : "edx", "cc", "memory"); \ + return iv; \ + } ++#endif ++ ++#elif defined(__x86_64__) || defined(__x86_64) ++ ++/* Load supported features of the CPU to see if ++ the PadLock is available. */ ++static int ++padlock_available(void) ++{ ++ char vendor_string[16]; ++ unsigned int eax, edx; ++ ++ /* Are we running on the Centaur (VIA) CPU? */ ++ eax = 0x00000000; ++ vendor_string[12] = 0; ++ asm volatile ( ++ "cpuid\n" ++ "movl %%ebx,(%1)\n" ++ "movl %%edx,4(%1)\n" ++ "movl %%ecx,8(%1)\n" ++ : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx"); ++ if (strcmp(vendor_string, "CentaurHauls") != 0) ++ return 0; ++ ++ /* Check for Centaur Extended Feature Flags presence */ ++ eax = 0xC0000000; ++ asm volatile ("cpuid" ++ : "+a"(eax) : : "rbx", "rcx", "rdx"); ++ if (eax < 0xC0000001) ++ return 0; ++ ++ /* Read the Centaur Extended Feature Flags */ ++ eax = 0xC0000001; ++ asm volatile ("cpuid" ++ : "+a"(eax), "=d"(edx) : : "rbx", "rcx"); ++ ++ /* Fill up some flags */ ++ padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); ++ padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); ++ ++ return padlock_use_ace + padlock_use_rng; ++} ++ ++/* Force key reload from memory to the CPU microcode. ++ Loading EFLAGS from the stack clears EFLAGS[30] ++ which does the trick. */ ++static inline void ++padlock_reload_key(void) ++{ ++ asm volatile ("pushfq; popfq"); ++} ++ ++#ifndef OPENSSL_NO_AES ++/* ++ * This is heuristic key context tracing. At first one ++ * believes that one should use atomic swap instructions, ++ * but it's not actually necessary. Point is that if ++ * padlock_saved_context was changed by another thread ++ * after we've read it and before we compare it with cdata, ++ * our key *shall* be reloaded upon thread context switch ++ * and we are therefore set in either case... ++ */ ++static inline void ++padlock_verify_context(struct padlock_cipher_data *cdata) ++{ ++ asm volatile ( ++ "pushfq\n" ++" btl $30,(%%rsp)\n" ++" jnc 1f\n" ++" cmpq %2,%1\n" ++" je 1f\n" ++" popfq\n" ++" subq $8,%%rsp\n" ++"1: addq $8,%%rsp\n" ++" movq %2,%0" ++ :"+m"(padlock_saved_context) ++ : "r"(padlock_saved_context), "r"(cdata) : "cc"); ++} ++ ++/* Template for padlock_xcrypt_* modes */ ++/* BIG FAT WARNING: ++ * The offsets used with 'leal' instructions ++ * describe items of the 'padlock_cipher_data' ++ * structure. ++ */ ++#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ ++static inline void *name(size_t cnt, \ ++ struct padlock_cipher_data *cdata, \ ++ void *out, const void *inp) \ ++{ void *iv; \ ++ asm volatile ( "leaq 16(%0),%%rdx\n" \ ++ " leaq 32(%0),%%rbx\n" \ ++ rep_xcrypt "\n" \ ++ : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ ++ : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ ++ : "rbx", "rdx", "cc", "memory"); \ ++ return iv; \ ++} ++#endif ++ ++#endif /* cpu */ ++ ++ ++# ifndef OPENSSL_NO_AES + + /* Generate all functions with appropriate opcodes */ + /* rep xcryptecb */ +@@ -458,7 +550,23 @@ PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") + PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") + /* rep xcryptofb */ + PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") ++ ++# ifndef AES_ASM ++/* Our own htonl()/ntohl() */ ++static inline void padlock_bswapl(AES_KEY *ks) ++{ ++ size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); ++ unsigned int *key = ks->rd_key; ++ ++ while (i--) { ++ asm volatile ("bswapl %0":"+r" (*key)); ++ key++; ++ } ++} ++# endif ++ + # endif ++ + /* The RNG call itself */ + static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) + { +-- +2.2.2 + diff --git a/main/openssl/0011-engines-e_padlock-implement-sha1-sha224-sha256-accel.patch b/main/openssl/0011-engines-e_padlock-implement-sha1-sha224-sha256-accel.patch new file mode 100644 index 0000000000..5a2cdd633a --- /dev/null +++ b/main/openssl/0011-engines-e_padlock-implement-sha1-sha224-sha256-accel.patch @@ -0,0 +1,782 @@ +From 728af0306505f1ff91364ac2175fb6bf5da90ec3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= +Date: Thu, 5 Feb 2015 09:41:12 +0200 +Subject: [PATCH] engines/e_padlock: implement sha1/sha224/sha256 acceleration + +Limited support for VIA C7 that works only when EVP_MD_CTX_FLAG_ONESHOT +is used appropriately (as done by EVP_Digest, and my previous HMAC patch). + +Full support for VIA Nano including partial transformation and 64-bit mode. + +Benchmarks from VIA Nano 1.6GHz, done with including the previous HMAC and +apps/speed patches done. From single run, error margin of about 100-200k. + +No padlock + +type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes +sha1 20057.60k 51514.05k 99721.39k 130167.81k 142811.14k +sha256 7757.72k 16907.18k 28937.05k 35181.23k 37568.51k +hmac(sha1) 8582.53k 27644.69k 70402.30k 114602.67k 140167.85k + +With the patch + +sha1 37713.77k 114562.71k 259637.33k 379907.41k 438818.13k +sha256 34262.86k 103233.75k 232476.07k 338386.60k 389860.01k +hmac(sha1) 8424.70k 31475.11k 104036.10k 245559.30k 406667.26k +--- + engines/e_padlock.c | 663 ++++++++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 614 insertions(+), 49 deletions(-) + +diff --git a/engines/e_padlock.c b/engines/e_padlock.c +index 94406cb..5e99114 100644 +--- a/engines/e_padlock.c ++++ b/engines/e_padlock.c +@@ -3,6 +3,9 @@ + * Written by Michal Ludvig + * http://www.logix.cz/michal + * ++ * SHA support by Timo Teras . Portions based on ++ * code originally written by Michal Ludvig. ++ * + * Big thanks to Andy Polyakov for a help with optimization, + * assembler fixes, port to MS Windows and a lot of other + * valuable work on this engine! +@@ -63,7 +66,9 @@ + */ + + #include ++#include + #include ++#include + + #include + #include +@@ -73,11 +78,32 @@ + #ifndef OPENSSL_NO_AES + # include + #endif ++#ifndef OPENSSL_NO_SHA ++# include ++#endif + #include + #include + + #ifndef OPENSSL_NO_HW +-# ifndef OPENSSL_NO_HW_PADLOCK ++# ifndef OPENSSL_NO_HW_PADLOCK ++ ++/* PadLock RNG is disabled by default */ ++# define PADLOCK_NO_RNG 1 ++ ++/* No ASM routines for SHA in MSC yet */ ++# ifdef _MSC_VER ++# define OPENSSL_NO_SHA ++# endif ++ ++/* 64-bit mode does not need software SHA1 as fallback, we can ++ * do all operations with padlock */ ++# if defined(__x86_64__) || defined(__x86_64) ++# define PADLOCK_NEED_FALLBACK_SHA 0 ++# else ++# define PADLOCK_NEED_FALLBACK_SHA 1 ++# endif ++ ++# define PADLOCK_MAX_FINALIZING_LENGTH 0x1FFFFFFE + + /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ + # if (OPENSSL_VERSION_NUMBER >= 0x00908000L) +@@ -151,60 +177,42 @@ void ENGINE_load_padlock(void) + static int padlock_available(void); + static int padlock_init(ENGINE *e); + ++# ifndef PADLOCK_NO_RNG + /* RNG Stuff */ + static RAND_METHOD padlock_rand; +- +-/* Cipher Stuff */ +-# ifndef OPENSSL_NO_AES +-static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, +- const int **nids, int nid); + # endif + + /* Engine names */ + static const char *padlock_id = "padlock"; + static char padlock_name[100]; + ++static int padlock_bind_helper(ENGINE *e); ++ + /* Available features */ +-static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ +-static int padlock_use_rng = 0; /* Random Number Generator */ ++enum padlock_flags { ++ PADLOCK_RNG = 0x01, ++ PADLOCK_ACE = 0x02, ++ PADLOCK_ACE2 = 0x04, ++ PADLOCK_PHE = 0x08, ++ PADLOCK_PMM = 0x10, ++ PADLOCK_NANO = 0x20, ++}; ++enum padlock_flags padlock_flags; ++ ++#define PADLOCK_HAVE_RNG (padlock_flags & PADLOCK_RNG) ++#define PADLOCK_HAVE_ACE (padlock_flags & (PADLOCK_ACE|PADLOCK_ACE2)) ++#define PADLOCK_HAVE_ACE1 (padlock_flags & PADLOCK_ACE) ++#define PADLOCK_HAVE_ACE2 (padlock_flags & PADLOCK_ACE2) ++#define PADLOCK_HAVE_PHE (padlock_flags & PADLOCK_PHE) ++#define PADLOCK_HAVE_PMM (padlock_flags & PADLOCK_PMM) ++#define PADLOCK_HAVE_NANO (padlock_flags & PADLOCK_NANO) ++ + # ifndef OPENSSL_NO_AES + static int padlock_aes_align_required = 1; + # endif + + /* ===== Engine "management" functions ===== */ + +-/* Prepare the ENGINE structure for registration */ +-static int padlock_bind_helper(ENGINE *e) +-{ +- /* Check available features */ +- padlock_available(); +- +-# if 1 /* disable RNG for now, see commentary in +- * vicinity of RNG code */ +- padlock_use_rng = 0; +-# endif +- +- /* Generate a nice engine name with available features */ +- BIO_snprintf(padlock_name, sizeof(padlock_name), +- "VIA PadLock (%s, %s)", +- padlock_use_rng ? "RNG" : "no-RNG", +- padlock_use_ace ? "ACE" : "no-ACE"); +- +- /* Register everything or return with an error */ +- if (!ENGINE_set_id(e, padlock_id) || +- !ENGINE_set_name(e, padlock_name) || +- !ENGINE_set_init_function(e, padlock_init) || +-# ifndef OPENSSL_NO_AES +- (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) || +-# endif +- (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) { +- return 0; +- } +- +- /* Everything looks good */ +- return 1; +-} +- + # ifdef OPENSSL_NO_DYNAMIC_ENGINE + + /* Constructor */ +@@ -229,7 +237,7 @@ static ENGINE *ENGINE_padlock(void) + /* Check availability of the engine */ + static int padlock_init(ENGINE *e) + { +- return (padlock_use_rng || padlock_use_ace); ++ return padlock_flags; + } + + /* +@@ -377,10 +385,20 @@ static int padlock_available(void) + "=d"(edx)::"ecx"); + + /* Fill up some flags */ +- padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6)); +- padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2)); +- +- return padlock_use_ace + padlock_use_rng; ++ padlock_flags |= ((edx & (0x3<<3)) ? PADLOCK_RNG : 0); ++ padlock_flags |= ((edx & (0x3<<7)) ? PADLOCK_ACE : 0); ++ padlock_flags |= ((edx & (0x3<<9)) ? PADLOCK_ACE2 : 0); ++ padlock_flags |= ((edx & (0x3<<11)) ? PADLOCK_PHE : 0); ++ padlock_flags |= ((edx & (0x3<<13)) ? PADLOCK_PMM : 0); ++ ++ /* Check for VIA Nano CPU */ ++ eax = 0x00000001; ++ asm volatile ("pushl %%ebx; cpuid; popl %%ebx" ++ : "+a"(eax) : : "ecx", "edx"); ++ if ((eax | 0x000F) == 0x06FF) ++ padlock_flags |= PADLOCK_NANO; ++ ++ return padlock_flags; + } + + /* +@@ -473,10 +491,14 @@ padlock_available(void) + : "+a"(eax), "=d"(edx) : : "rbx", "rcx"); + + /* Fill up some flags */ +- padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); +- padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); +- +- return padlock_use_ace + padlock_use_rng; ++ padlock_flags |= ((edx & (0x3<<3)) ? PADLOCK_RNG : 0); ++ padlock_flags |= ((edx & (0x3<<7)) ? PADLOCK_ACE : 0); ++ padlock_flags |= ((edx & (0x3<<9)) ? PADLOCK_ACE2 : 0); ++ padlock_flags |= ((edx & (0x3<<11)) ? PADLOCK_PHE : 0); ++ padlock_flags |= ((edx & (0x3<<13)) ? PADLOCK_PMM : 0); ++ padlock_flags |= PADLOCK_NANO; ++ ++ return padlock_flags; + } + + /* Force key reload from memory to the CPU microcode. +@@ -1293,6 +1315,496 @@ padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, + + # endif /* OPENSSL_NO_AES */ + ++#ifndef OPENSSL_NO_SHA ++ ++static inline void ++padlock_copy_bswap(void *dst, void *src, size_t count) ++{ ++ uint32_t *udst = dst, *usrc = src; ++ int i = 0; ++ ++ for (i = 0; i < count; i++) ++ udst[i] = htonl(usrc[i]); ++} ++ ++static unsigned long padlock_sha_prepare_padding( ++ EVP_MD_CTX *ctx, ++ unsigned char *padding, ++ unsigned char *data, size_t data_len, ++ uint64_t total) ++{ ++ unsigned int padding_len; ++ ++ padding_len = data_len < 56 ? SHA_CBLOCK : 2 * SHA_CBLOCK; ++ if (data_len) ++ memcpy(padding, data, data_len); ++ ++ memset(padding + data_len, 0, padding_len - data_len); ++ padding[data_len] = 0x80; ++ *(uint32_t *)(padding + padding_len - 8) = htonl(total >> 32); ++ *(uint32_t *)(padding + padding_len - 4) = htonl(total & 0xffffffff); ++ ++ return data_len < 56 ? 1 : 2; ++} ++ ++#define PADLOCK_SHA_ALIGN(dd) (uint32_t*)(((uintptr_t)(dd) + 15) & ~15) ++#define PADLOCK_SHA_HWCTX (128+16) ++ ++static void ++padlock_sha1(void *hwctx, const void *buf, unsigned long total, unsigned long now) ++{ ++ unsigned long pos = total - now; ++ ++ asm volatile ("xsha1" ++ : "+S"(buf), "+D"(hwctx), "+a"(pos), "+c"(total) ++ : : "memory"); ++} ++ ++static void ++padlock_sha1_partial(void *hwctx, const void *buf, unsigned long blocks) ++{ ++ asm volatile ("xsha1" ++ : "+S"(buf), "+D"(hwctx), "+c"(blocks) ++ : "a"(-1L) : "memory"); ++} ++ ++static int padlock_sha1_init(EVP_MD_CTX *ctx) ++{ ++ return SHA1_Init(ctx->md_data); ++} ++ ++#if PADLOCK_NEED_FALLBACK_SHA ++ ++static int padlock_sha1_update_eden(EVP_MD_CTX *ctx, const void *data, ++ size_t len) ++{ ++ unsigned char hwctx[PADLOCK_SHA_HWCTX]; ++ uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx); ++ SHA_CTX *c = ctx->md_data; ++ uint_fast64_t total; ++ const unsigned char *p = data; ++ unsigned long l = 0; ++ ++ /* Calculate total length (Nl,Nh) is length in bits */ ++ total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3); ++ total += len; ++ ++ if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) && ++ (total <= PADLOCK_MAX_FINALIZING_LENGTH)) { ++ if (c->num != 0) { ++ l = (len < SHA_CBLOCK - c->num) ? len : (SHA_CBLOCK - c->num); ++ if (!SHA1_Update(c, data, l)) ++ return 0; ++ p += l; ++ if (c->num != 0) { ++ p = (unsigned char *) c->data; ++ len = c->num; ++ l = 0; ++ } ++ } ++ memcpy(aligned, &c->h0, 5 * sizeof(SHA_LONG)); ++ padlock_sha1(aligned, p, total, len - l); ++ memcpy(&c->h0, aligned, 5 * sizeof(SHA_LONG)); ++ c->num = -1; ++ return 1; ++ } ++ ++ return SHA1_Update(c, data, len); ++} ++#endif ++ ++static int padlock_sha1_update(EVP_MD_CTX *ctx, const void *data, ++ size_t len) ++{ ++ unsigned char hwctx[PADLOCK_SHA_HWCTX]; ++ uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx); ++ SHA_CTX *c = ctx->md_data; ++ uint_fast64_t total; ++ unsigned char *p; ++ unsigned long n; ++ ++ /* Calculate total length (Nl,Nh) is length in bits */ ++ total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3); ++ total += len; ++ c->Nh = total >> 29; ++ c->Nl = (total << 3) & 0xffffffffUL; ++ ++ memcpy(aligned, &c->h0, 5 * sizeof(SHA_LONG)); ++ ++ /* Check partial data */ ++ n = c->num; ++ if (n) { ++ p = (unsigned char *) c->data; ++ if (len >= SHA_CBLOCK || len+n >= SHA_CBLOCK) { ++ memcpy(p+n, data, SHA_CBLOCK-n); ++ padlock_sha1_partial(aligned, p, 1); ++ n = SHA_CBLOCK - n; ++ data += n; ++ len -= n; ++ c->num = 0; ++ memset(p, 0, SHA_CBLOCK); ++ } else { ++ memcpy(p+n, data, len); ++ c->num += (unsigned int)len; ++ return 1; ++ } ++ } ++ ++ /* Can we finalize straight away? */ ++ if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) && ++ (total <= PADLOCK_MAX_FINALIZING_LENGTH)) { ++ padlock_sha1(aligned, data, total, len); ++ memcpy(&c->h0, aligned, 5 * sizeof(SHA_LONG)); ++ c->num = -1; ++ return 1; ++ } ++ ++ /* Use nonfinalizing update */ ++ n = len / SHA_CBLOCK; ++ if (n != 0) { ++ padlock_sha1_partial(aligned, data, n); ++ data += n * SHA_CBLOCK; ++ len -= n * SHA_CBLOCK; ++ } ++ memcpy(&c->h0, aligned, 5 * sizeof(SHA_LONG)); ++ ++ /* Buffer remaining bytes */ ++ if (len) { ++ memcpy(c->data, data, len); ++ c->num = len; ++ } ++ ++ return 1; ++} ++ ++static int padlock_sha1_final(EVP_MD_CTX *ctx, unsigned char *md) ++{ ++ unsigned char hwctx[PADLOCK_SHA_HWCTX]; ++ uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx); ++ uint64_t total; ++ SHA_CTX *c = ctx->md_data; ++ ++ if (c->num == -1) { ++ padlock_copy_bswap(md, &c->h0, 5); ++ c->num = 0; ++ return 1; ++ } ++ ++ total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3); ++#if PADLOCK_NEED_FALLBACK_SHA ++ if ((!PADLOCK_HAVE_NANO) && (total > PADLOCK_MAX_FINALIZING_LENGTH)) ++ return SHA1_Final(md, c); ++#endif ++ ++ memcpy(aligned, &c->h0, 5 * sizeof(SHA_LONG)); ++ if (total > PADLOCK_MAX_FINALIZING_LENGTH) { ++ unsigned char padding[2 * SHA_CBLOCK]; ++ unsigned long n; ++ ++ n = padlock_sha_prepare_padding(ctx, padding, ++ (unsigned char *) c->data, c->num, total << 3); ++ padlock_sha1_partial(aligned, padding, n); ++ } else { ++ padlock_sha1(aligned, c->data, total, c->num); ++ } ++ padlock_copy_bswap(md, aligned, 5); ++ c->num = 0; ++ ++ return 1; ++} ++ ++static EVP_MD padlock_sha1_md = { ++ NID_sha1, ++ NID_sha1WithRSAEncryption, ++ SHA_DIGEST_LENGTH, ++ EVP_MD_FLAG_PKEY_METHOD_SIGNATURE, ++ padlock_sha1_init, ++ padlock_sha1_update, ++ padlock_sha1_final, ++ NULL, ++ NULL, ++ EVP_PKEY_RSA_method, ++ SHA_CBLOCK, ++ sizeof(SHA_CTX), ++}; ++ ++static EVP_MD padlock_dss1_md = { ++ NID_dsa, ++ NID_dsaWithSHA1, ++ SHA_DIGEST_LENGTH, ++ 0, ++ padlock_sha1_init, ++ padlock_sha1_update, ++ padlock_sha1_final, ++ NULL, ++ NULL, ++ EVP_PKEY_DSA_method, ++ SHA_CBLOCK, ++ sizeof(SHA_CTX), ++}; ++ ++ ++#if !defined(OPENSSL_NO_SHA256) ++ ++static void ++padlock_sha256(void *hwctx, const void *buf, unsigned long total, unsigned long now) ++{ ++ unsigned long pos = total - now; ++ ++ asm volatile ("xsha256" ++ : "+S"(buf), "+D"(hwctx), "+a"(pos), "+c"(total) ++ : : "memory"); ++} ++ ++static void ++padlock_sha256_partial(void *hwctx, const void *buf, unsigned long blocks) ++{ ++ asm volatile ("xsha256" ++ : "+S"(buf), "+D"(hwctx), "+c"(blocks) ++ : "a"(-1L) : "memory"); ++} ++ ++#if PADLOCK_NEED_FALLBACK_SHA ++ ++static int padlock_sha256_update_eden(EVP_MD_CTX *ctx, const void *data, ++ size_t len) ++{ ++ unsigned char hwctx[PADLOCK_SHA_HWCTX]; ++ uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx); ++ SHA256_CTX *c = ctx->md_data; ++ uint_fast64_t total; ++ const unsigned char *p = data; ++ unsigned int l = 0; ++ ++ /* Calculate total length (Nl,Nh) is length in bits */ ++ total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3); ++ total += len; ++ ++ if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) && ++ (total <= PADLOCK_MAX_FINALIZING_LENGTH)) { ++ if (c->num != 0) { ++ l = (len < SHA256_CBLOCK - c->num) ? len : (SHA256_CBLOCK - c->num); ++ if (!SHA256_Update(c, data, l)) ++ return 0; ++ p += l; ++ if (c->num != 0) { ++ p = (unsigned char *) c->data; ++ len = c->num; ++ l = 0; ++ } ++ } ++ memcpy(aligned, c->h, sizeof(c->h)); ++ padlock_sha256(aligned, p, total, len - l); ++ memcpy(c->h, aligned, sizeof(c->h)); ++ c->num = -1; ++ return 1; ++ } ++ ++ return SHA256_Update(c, data, len); ++} ++ ++#endif ++ ++static int padlock_sha256_update(EVP_MD_CTX *ctx, const void *data, ++ size_t len) ++{ ++ unsigned char hwctx[PADLOCK_SHA_HWCTX]; ++ uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx); ++ SHA256_CTX *c = ctx->md_data; ++ uint_fast64_t total; ++ unsigned char *p; ++ unsigned long n; ++ ++ /* Calculate total length (Nl,Nh) is length in bits */ ++ total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3); ++ total += len; ++ c->Nh = total >> 29; ++ c->Nl = (total << 3) & 0xffffffffUL; ++ ++ memcpy(aligned, c->h, sizeof(c->h)); ++ ++ /* Check partial data */ ++ n = c->num; ++ if (n) { ++ p = (unsigned char *) c->data; ++ if (len >= SHA256_CBLOCK || len+n >= SHA256_CBLOCK) { ++ memcpy(p+n, data, SHA256_CBLOCK-n); ++ padlock_sha256_partial(aligned, p, 1); ++ n = SHA256_CBLOCK - n; ++ data += n; ++ len -= n; ++ c->num = 0; ++ memset(p, 0, SHA256_CBLOCK); ++ } else { ++ memcpy(p+n, data, len); ++ c->num += (unsigned int)len; ++ return 1; ++ } ++ } ++ ++ /* Can we finalize straight away? */ ++ if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) && ++ (total <= PADLOCK_MAX_FINALIZING_LENGTH)) { ++ padlock_sha256(aligned, data, total, len); ++ memcpy(c->h, aligned, sizeof(c->h)); ++ c->num = -1; ++ return 1; ++ } ++ ++ /* Use nonfinalizing update */ ++ n = len / SHA256_CBLOCK; ++ if (n != 0) { ++ padlock_sha256_partial(aligned, data, n); ++ data += n * SHA256_CBLOCK; ++ len -= n * SHA256_CBLOCK; ++ } ++ memcpy(c->h, aligned, sizeof(c->h)); ++ ++ /* Buffer remaining bytes */ ++ if (len) { ++ memcpy(c->data, data, len); ++ c->num = len; ++ } ++ ++ return 1; ++} ++ ++static int padlock_sha256_final(EVP_MD_CTX *ctx, unsigned char *md) ++{ ++ unsigned char hwctx[PADLOCK_SHA_HWCTX]; ++ uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx); ++ uint64_t total; ++ SHA256_CTX *c = ctx->md_data; ++ ++ if (c->num == -1) { ++ padlock_copy_bswap(md, c->h, sizeof(c->h)/sizeof(c->h[0])); ++ c->num = 0; ++ return 1; ++ } ++ ++ total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3); ++#if PADLOCK_NEED_FALLBACK_SHA ++ if ((!PADLOCK_HAVE_NANO) && (total > PADLOCK_MAX_FINALIZING_LENGTH)) ++ return SHA256_Final(md, c); ++#endif ++ ++ memcpy(aligned, c->h, sizeof(c->h)); ++ if (total > PADLOCK_MAX_FINALIZING_LENGTH) { ++ unsigned char padding[2 * SHA_CBLOCK]; ++ unsigned long n; ++ ++ n = padlock_sha_prepare_padding(ctx, padding, ++ (unsigned char *) c->data, c->num, total << 3); ++ padlock_sha256_partial(aligned, padding, n); ++ } else { ++ padlock_sha256(aligned, c->data, total, c->num); ++ } ++ padlock_copy_bswap(md, aligned, sizeof(c->h)/sizeof(c->h[0])); ++ c->num = 0; ++ return 1; ++} ++ ++#if !defined(OPENSSL_NO_SHA224) ++ ++static int padlock_sha224_init(EVP_MD_CTX *ctx) ++{ ++ return SHA224_Init(ctx->md_data); ++} ++ ++static EVP_MD padlock_sha224_md = { ++ NID_sha224, ++ NID_sha224WithRSAEncryption, ++ SHA224_DIGEST_LENGTH, ++ 0, ++ padlock_sha224_init, ++ padlock_sha256_update, ++ padlock_sha256_final, ++ NULL, ++ NULL, ++ EVP_PKEY_RSA_method, ++ SHA_CBLOCK, ++ sizeof(SHA256_CTX), ++}; ++#endif /* !OPENSSL_NO_SHA224 */ ++ ++static int padlock_sha256_init(EVP_MD_CTX *ctx) ++{ ++ return SHA256_Init(ctx->md_data); ++} ++ ++static EVP_MD padlock_sha256_md = { ++ NID_sha256, ++ NID_sha256WithRSAEncryption, ++ SHA256_DIGEST_LENGTH, ++ 0, ++ padlock_sha256_init, ++ padlock_sha256_update, ++ padlock_sha256_final, ++ NULL, ++ NULL, ++ EVP_PKEY_RSA_method, ++ SHA_CBLOCK, ++ sizeof(SHA256_CTX), ++}; ++#endif /* !OPENSSL_NO_SHA256 */ ++ ++static int padlock_digest_nids[] = { ++#if !defined(OPENSSL_NO_SHA) ++ NID_sha1, ++ NID_dsa, ++#endif ++#if !defined(OPENSSL_NO_SHA256) ++#if !defined(OPENSSL_NO_SHA224) ++ NID_sha224, ++#endif ++ NID_sha256, ++#endif ++}; ++ ++static int padlock_digest_nids_num = sizeof(padlock_digest_nids)/sizeof(padlock_digest_nids[0]); ++ ++static int ++padlock_digests (ENGINE *e, const EVP_MD **digest, const int **nids, int nid) ++{ ++ /* No specific digest => return a list of supported nids ... */ ++ if (!digest) { ++ *nids = padlock_digest_nids; ++ return padlock_digest_nids_num; ++ } ++ ++ /* ... or the requested "digest" otherwise */ ++ switch (nid) { ++#if !defined(OPENSSL_NO_SHA) ++ case NID_sha1: ++ *digest = &padlock_sha1_md; ++ break; ++ case NID_dsa: ++ *digest = &padlock_dss1_md; ++ break; ++#endif ++#if !defined(OPENSSL_NO_SHA256) ++#if !defined(OPENSSL_NO_SHA224) ++ case NID_sha224: ++ *digest = &padlock_sha224_md; ++ break; ++#endif /* OPENSSL_NO_SHA224 */ ++ case NID_sha256: ++ *digest = &padlock_sha256_md; ++ break; ++#endif /* OPENSSL_NO_SHA256 */ ++ default: ++ /* Sorry, we don't support this NID */ ++ *digest = NULL; ++ return 0; ++ } ++ ++ return 1; ++} ++ ++#endif /* OPENSSL_NO_SHA */ ++ ++#ifndef PADLOCK_NO_RNG ++ + /* ===== Random Number Generator ===== */ + /* + * This code is not engaged. The reason is that it does not comply +@@ -1356,6 +1868,59 @@ static RAND_METHOD padlock_rand = { + padlock_rand_bytes, /* pseudorand */ + padlock_rand_status, /* rand status */ + }; ++#endif /* PADLOCK_NO_RNG */ ++ ++/* Prepare the ENGINE structure for registration */ ++static int ++padlock_bind_helper(ENGINE *e) ++{ ++ /* Check available features */ ++ padlock_available(); ++ ++ /* Generate a nice engine name with available features */ ++ BIO_snprintf(padlock_name, sizeof(padlock_name), ++ "VIA PadLock: %s%s%s%s%s%s", ++ padlock_flags ? "" : "not supported", ++ PADLOCK_HAVE_RNG ? "RNG " : "", ++ PADLOCK_HAVE_ACE ? (PADLOCK_HAVE_ACE2 ? "ACE2 " : "ACE ") : "", ++ PADLOCK_HAVE_PHE ? "PHE " : "", ++ PADLOCK_HAVE_PMM ? "PMM " : "", ++ PADLOCK_HAVE_NANO ? "NANO " : "" ++ ); ++ ++#if PADLOCK_NEED_FALLBACK_SHA && !defined(OPENSSL_NO_SHA) ++ if (!PADLOCK_HAVE_NANO) { ++ padlock_sha1_md.update = padlock_sha1_update_eden; ++ padlock_dss1_md.update = padlock_sha1_update_eden; ++#if !defined(OPENSSL_NO_SHA256) ++#if !defined(OPENSSL_NO_SHA224) ++ padlock_sha224_md.update = padlock_sha256_update_eden; ++#endif ++ padlock_sha256_md.update = padlock_sha256_update_eden; ++#endif ++ } ++#endif ++ ++ /* Register everything or return with an error */ ++ if (!ENGINE_set_id(e, padlock_id) || ++ !ENGINE_set_name(e, padlock_name) || ++ !ENGINE_set_init_function(e, padlock_init) ++#ifndef OPENSSL_NO_AES ++ || (PADLOCK_HAVE_ACE && !ENGINE_set_ciphers (e, padlock_ciphers)) ++#endif ++#ifndef OPENSSL_NO_SHA ++ || (PADLOCK_HAVE_PHE && !ENGINE_set_digests (e, padlock_digests)) ++#endif ++#ifndef PADLOCK_NO_RNG ++ || (PADLOCK_HAVE_RNG && !ENGINE_set_RAND (e, &padlock_rand)) ++#endif ++ ) { ++ return 0; ++ } ++ ++ /* Everything looks good */ ++ return 1; ++} + + # else /* !COMPILE_HW_PADLOCK */ + # ifndef OPENSSL_NO_DYNAMIC_ENGINE +-- +2.2.2 + diff --git a/main/openssl/0012-crypto-engine-autoload-padlock-dynamic-engine.patch b/main/openssl/0012-crypto-engine-autoload-padlock-dynamic-engine.patch new file mode 100644 index 0000000000..d0cdfb3b3a --- /dev/null +++ b/main/openssl/0012-crypto-engine-autoload-padlock-dynamic-engine.patch @@ -0,0 +1,32 @@ +From a358a1267644829144d6ad35116733ceeef46bf1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= +Date: Thu, 5 Feb 2015 09:43:37 +0200 +Subject: [PATCH] crypto/engine: autoload padlock dynamic engine + +--- + crypto/engine/eng_all.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/crypto/engine/eng_all.c b/crypto/engine/eng_all.c +index 195a3a9..bd05e4c 100644 +--- a/crypto/engine/eng_all.c ++++ b/crypto/engine/eng_all.c +@@ -120,6 +120,15 @@ void ENGINE_load_builtin_engines(void) + ENGINE_load_capi(); + # endif + #endif ++#ifdef OPENSSL_NO_STATIC_ENGINE ++ { ++ ENGINE *e = ENGINE_by_id("padlock"); ++ if (e != NULL) { ++ ENGINE_add(e); ++ ENGINE_free(e); ++ } ++ } ++#endif + ENGINE_register_all_complete(); + } + +-- +2.2.2 + -- cgit v1.2.3