[PATCH] Use EVP_MAC interface for Poly1305 if supported.

Chris Rapier rapier at psc.edu
Tue Oct 25 06:23:36 AEDT 2022


New version of the patch. This time the MAC ctx is part of the 
chachapoly_ctx. This improved performance even more. The testbed are 2 
AMD Epyc 7502Ps connected via 10Gb through a local switch. The test is
"./hpnssh remote  "dd if=/dev/zero bs=1M count=20000" > /dev/null". The 
results are an average of 10 runs. Baseline is 604.9MB/s and the EVP 
version of poly1305 hit 733.5 MB/s. So that's just a bit more than 21% 
improvement in throughput.

Please note that these results are for hpnssh and not mainline OpenSSH. 
The results for OpenSSH (9.0p1) are actually a lot more dramatic. 
Baseline on the same testbed is 297MB/s. The EVP version clocked in at 
637MB/s. I tested compatibility against other versions of OpenSSH and it 
does work. I feel like I must be doing something wrong but if I am it's 
not obvious to me.

Chris

diff --git a/cipher-chachapoly-libcrypto.c b/cipher-chachapoly-libcrypto.c
index 719f9c843..199f2974e 100644
--- a/cipher-chachapoly-libcrypto.c
+++ b/cipher-chachapoly-libcrypto.c
@@ -37,12 +37,16 @@

  struct chachapoly_ctx {
         EVP_CIPHER_CTX *main_evp, *header_evp;
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+       EVP_MAC_CTX *poly_ctx;
+#endif
  };

  struct chachapoly_ctx *
  chachapoly_new(const u_char *key, u_int keylen)
  {
         struct chachapoly_ctx *ctx;
+       EVP_MAC *mac = NULL;

         if (keylen != (32 + 32)) /* 2 x 256 bit keys */
                 return NULL;
@@ -57,6 +61,12 @@ chachapoly_new(const u_char *key, u_int keylen)
                 goto fail;
         if (EVP_CIPHER_CTX_iv_length(ctx->header_evp) != 16)
                 goto fail;
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+       if ((mac = EVP_MAC_fetch(NULL, "POLY1305", NULL)) == NULL)
+               goto fail;
+       if ((ctx->poly_ctx = EVP_MAC_CTX_new(mac)) == NULL)
+               goto fail;
+#endif
         return ctx;
   fail:
         chachapoly_free(ctx);
@@ -70,6 +80,9 @@ chachapoly_free(struct chachapoly_ctx *cpctx)
                 return;
         EVP_CIPHER_CTX_free(cpctx->main_evp);
         EVP_CIPHER_CTX_free(cpctx->header_evp);
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+       EVP_MAC_CTX_free(cpctx->poly_ctx);
+#endif
         freezero(cpctx, sizeof(*cpctx));
  }

@@ -90,6 +103,13 @@ chachapoly_crypt(struct chachapoly_ctx *ctx, u_int 
seqnr, u_char *dest,
         int r = SSH_ERR_INTERNAL_ERROR;
         u_char expected_tag[POLY1305_TAGLEN], poly_key[POLY1305_KEYLEN];

+       /* using the EVP_MAC interface for poly1305 is significantly
+        * faster than the version bundled with OpenSSH. However,
+        * this interface is only available in OpenSSL 3.0+
+        * -cjr 10/21/2022 */
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+       size_t poly_out_len;
+#endif
         /*
          * Run ChaCha20 once to generate the Poly1305 key. The IV is the
          * packet sequence number.
@@ -104,11 +124,25 @@ chachapoly_crypt(struct chachapoly_ctx *ctx, u_int 
seqnr, u_char *dest,
                 goto out;
         }

+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+       /* init the MAC each time to get the new key */
+       if(!EVP_MAC_init(ctx->poly_ctx, (const u_char *)poly_key, 
POLY1305_KEYLEN, NULL)) {
+               r = SSH_ERR_LIBCRYPTO_ERROR;
+               goto out;
+       }
+#endif
+
         /* If decrypting, check tag before anything else */
         if (!do_encrypt) {
                 const u_char *tag = src + aadlen + len;
-
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+               /* EVP_MAC_update doesn't put the poly_mac into a buffer
+                * we need EVP_MAC_final for that */
+               EVP_MAC_update(ctx->poly_ctx, src, aadlen + len);
+               EVP_MAC_final(ctx->poly_ctx, expected_tag, 
&poly_out_len, (size_t)POLY1305_TAGLEN);
+#else
                 poly1305_auth(expected_tag, src, aadlen + len, poly_key);
+#endif
                 if (timingsafe_bcmp(expected_tag, tag, POLY1305_TAGLEN) 
!= 0) {
                         r = SSH_ERR_MAC_INVALID;
                         goto out;
@@ -134,8 +168,13 @@ chachapoly_crypt(struct chachapoly_ctx *ctx, u_int 
seqnr, u_char *dest,

         /* If encrypting, calculate and append tag */
         if (do_encrypt) {
-               poly1305_auth(dest + aadlen + len, dest, aadlen + len,
-                   poly_key);
+#if OPENSSL_VERSION_NUMBER >= 0x30000000UL
+         EVP_MAC_update(ctx->poly_ctx, dest, aadlen + len);
+         EVP_MAC_final(ctx->poly_ctx, dest + aadlen + len, 
&poly_out_len, (size_t)POLY1305_TAGLEN);
+#else
+         poly1305_auth(dest + aadlen + len, dest, aadlen + len,
+                       poly_key);
+#endif
         }
         r = 0;
   out:

On 10/24/22 11:53 AM, Chris Rapier wrote:
> On 10/22/22 6:49 PM, Darren Tucker wrote:
>> On Sat, 22 Oct 2022 at 07:53, Chris Rapier <rapier at psc.edu> wrote:
>> [...]
>>> I normally wouldn't clutter up the code with library version specific
>>> ifdefs but it might be worth considering.
>>
>> Instead of ifdefs, you can check if the MAC init succeeded before
>> calling the EVP functions, else fall back to the existing code path.
> 
> As pointed out, this is only in OSSL3. That said, for hpnssh we've been 
> looking at extracting the necessary code/assembly from OSSL3 and 
> incorporating it into our code base to provide this functionality. 
> Maybe. Depends on the complexity of the task.
>>> +       /* fetch the mac and create and initialize the context */
>>> +       if ((mac = EVP_MAC_fetch(NULL, "POLY1305", NULL)) == NULL ||
>>> +           (poly_ctx = EVP_MAC_CTX_new(mac)) == NULL ||
>>
>> You're initializing the MAC context on every call to this function.
>> If you initialize the context once, cache it (say, as a static) and
>> reuse it does it go any faster?
> 
> That's a fine question and one I hope to explore today. I also noticed 
> that I'm neglecting to free the the EVP_MAC and the EVP_MAC_CTX. Kind of 
> jumped the gun on that.
> 
> Chris


More information about the openssh-unix-dev mailing list