[PATCH] Use EVP_MAC interface for Poly1305 if supported.
Chris Rapier
rapier at psc.edu
Tue Oct 25 06:23:36 AEDT 2022
New version of the patch. This time the MAC ctx is part of the
chachapoly_ctx. This improved performance even more. The testbed are 2
AMD Epyc 7502Ps connected via 10Gb through a local switch. The test is
"./hpnssh remote "dd if=/dev/zero bs=1M count=20000" > /dev/null". The
results are an average of 10 runs. Baseline is 604.9MB/s and the EVP
version of poly1305 hit 733.5 MB/s. So that's just a bit more than 21%
improvement in throughput.
Please note that these results are for hpnssh and not mainline OpenSSH.
The results for OpenSSH (9.0p1) are actually a lot more dramatic.
Baseline on the same testbed is 297MB/s. The EVP version clocked in at
637MB/s. I tested compatibility against other versions of OpenSSH and it
does work. I feel like I must be doing something wrong but if I am it's
not obvious to me.
Chris
diff --git a/cipher-chachapoly-libcrypto.c b/cipher-chachapoly-libcrypto.c
index 719f9c843..199f2974e 100644
--- a/cipher-chachapoly-libcrypto.c
+++ b/cipher-chachapoly-libcrypto.c
@@ -37,12 +37,16 @@
struct chachapoly_ctx {
EVP_CIPHER_CTX *main_evp, *header_evp;
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+ EVP_MAC_CTX *poly_ctx;
+#endif
};
struct chachapoly_ctx *
chachapoly_new(const u_char *key, u_int keylen)
{
struct chachapoly_ctx *ctx;
+ EVP_MAC *mac = NULL;
if (keylen != (32 + 32)) /* 2 x 256 bit keys */
return NULL;
@@ -57,6 +61,12 @@ chachapoly_new(const u_char *key, u_int keylen)
goto fail;
if (EVP_CIPHER_CTX_iv_length(ctx->header_evp) != 16)
goto fail;
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+ if ((mac = EVP_MAC_fetch(NULL, "POLY1305", NULL)) == NULL)
+ goto fail;
+ if ((ctx->poly_ctx = EVP_MAC_CTX_new(mac)) == NULL)
+ goto fail;
+#endif
return ctx;
fail:
chachapoly_free(ctx);
@@ -70,6 +80,9 @@ chachapoly_free(struct chachapoly_ctx *cpctx)
return;
EVP_CIPHER_CTX_free(cpctx->main_evp);
EVP_CIPHER_CTX_free(cpctx->header_evp);
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+ EVP_MAC_CTX_free(cpctx->poly_ctx);
+#endif
freezero(cpctx, sizeof(*cpctx));
}
@@ -90,6 +103,13 @@ chachapoly_crypt(struct chachapoly_ctx *ctx, u_int
seqnr, u_char *dest,
int r = SSH_ERR_INTERNAL_ERROR;
u_char expected_tag[POLY1305_TAGLEN], poly_key[POLY1305_KEYLEN];
+ /* using the EVP_MAC interface for poly1305 is significantly
+ * faster than the version bundled with OpenSSH. However,
+ * this interface is only available in OpenSSL 3.0+
+ * -cjr 10/21/2022 */
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+ size_t poly_out_len;
+#endif
/*
* Run ChaCha20 once to generate the Poly1305 key. The IV is the
* packet sequence number.
@@ -104,11 +124,25 @@ chachapoly_crypt(struct chachapoly_ctx *ctx, u_int
seqnr, u_char *dest,
goto out;
}
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+ /* init the MAC each time to get the new key */
+ if(!EVP_MAC_init(ctx->poly_ctx, (const u_char *)poly_key,
POLY1305_KEYLEN, NULL)) {
+ r = SSH_ERR_LIBCRYPTO_ERROR;
+ goto out;
+ }
+#endif
+
/* If decrypting, check tag before anything else */
if (!do_encrypt) {
const u_char *tag = src + aadlen + len;
-
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+ /* EVP_MAC_update doesn't put the poly_mac into a buffer
+ * we need EVP_MAC_final for that */
+ EVP_MAC_update(ctx->poly_ctx, src, aadlen + len);
+ EVP_MAC_final(ctx->poly_ctx, expected_tag,
&poly_out_len, (size_t)POLY1305_TAGLEN);
+#else
poly1305_auth(expected_tag, src, aadlen + len, poly_key);
+#endif
if (timingsafe_bcmp(expected_tag, tag, POLY1305_TAGLEN)
!= 0) {
r = SSH_ERR_MAC_INVALID;
goto out;
@@ -134,8 +168,13 @@ chachapoly_crypt(struct chachapoly_ctx *ctx, u_int
seqnr, u_char *dest,
/* If encrypting, calculate and append tag */
if (do_encrypt) {
- poly1305_auth(dest + aadlen + len, dest, aadlen + len,
- poly_key);
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+ EVP_MAC_update(ctx->poly_ctx, dest, aadlen + len);
+ EVP_MAC_final(ctx->poly_ctx, dest + aadlen + len,
&poly_out_len, (size_t)POLY1305_TAGLEN);
+#else
+ poly1305_auth(dest + aadlen + len, dest, aadlen + len,
+ poly_key);
+#endif
}
r = 0;
out:
On 10/24/22 11:53 AM, Chris Rapier wrote:
> On 10/22/22 6:49 PM, Darren Tucker wrote:
>> On Sat, 22 Oct 2022 at 07:53, Chris Rapier <rapier at psc.edu> wrote:
>> [...]
>>> I normally wouldn't clutter up the code with library version specific
>>> ifdefs but it might be worth considering.
>>
>> Instead of ifdefs, you can check if the MAC init succeeded before
>> calling the EVP functions, else fall back to the existing code path.
>
> As pointed out, this is only in OSSL3. That said, for hpnssh we've been
> looking at extracting the necessary code/assembly from OSSL3 and
> incorporating it into our code base to provide this functionality.
> Maybe. Depends on the complexity of the task.
>>> + /* fetch the mac and create and initialize the context */
>>> + if ((mac = EVP_MAC_fetch(NULL, "POLY1305", NULL)) == NULL ||
>>> + (poly_ctx = EVP_MAC_CTX_new(mac)) == NULL ||
>>
>> You're initializing the MAC context on every call to this function.
>> If you initialize the context once, cache it (say, as a static) and
>> reuse it does it go any faster?
>
> That's a fine question and one I hope to explore today. I also noticed
> that I'm neglecting to free the the EVP_MAC and the EVP_MAC_CTX. Kind of
> jumped the gun on that.
>
> Chris
More information about the openssh-unix-dev
mailing list