From: "Stefan Kanthak" <stefan.kanthak@nexgo.de>
To: <linux-kernel@vger.kernel.org>, <linux-crypto@vger.kernel.org>
Cc: <tim.c.chen@linux.intel.com>, <sean.m.gulley@intel.com>
Subject: [PATCH 1/2] crypto: s(h)aving 40+ bytes off arch/x86/crypto/sha256_ni_asm.S
Date: Mon, 8 Apr 2024 11:26:52 +0200 [thread overview]
Message-ID: <5EEE09A9021540A5AAD8BFEEE915512D@H270> (raw)
Use shorter SSE2 instructions instead of some SSE4.1
use short displacements into K256
--- -/arch/x86/crypto/sha256_ni_asm.S
+++ +/arch/x86/crypto/sha256_ni_asm.S
@@ -108,17 +108,17 @@
* Need to reorder these appropriately
* DCBA, HGFE -> ABEF, CDGH
*/
- movdqu 0*16(DIGEST_PTR), STATE0
- movdqu 1*16(DIGEST_PTR), STATE1
+ movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */
+ movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */
- pshufd $0xB1, STATE0, STATE0 /* CDAB */
- pshufd $0x1B, STATE1, STATE1 /* EFGH */
movdqa STATE0, MSGTMP4
- palignr $8, STATE1, STATE0 /* ABEF */
- pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */
+ punpcklqdq STATE1, STATE0 /* FEBA */
+ punpckhqdq MSGTMP4, STATE1 /* DCHG */
+ pshufd $0x1B, STATE0, STATE0 /* ABEF */
+ pshufd $0xB1, STATE1, STATE1 /* CDGH */
movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
- lea K256(%rip), SHA256CONSTANTS
+ lea K256+8*16(%rip), SHA256CONSTANTS
.Lloop0:
/* Save hash values for addition after rounds */
@@ -129,18 +129,18 @@
movdqu 0*16(DATA_PTR), MSG
pshufb SHUF_MASK, MSG
movdqa MSG, MSGTMP0
- paddd 0*16(SHA256CONSTANTS), MSG
+ paddd -8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
/* Rounds 4-7 */
movdqu 1*16(DATA_PTR), MSG
pshufb SHUF_MASK, MSG
movdqa MSG, MSGTMP1
- paddd 1*16(SHA256CONSTANTS), MSG
+ paddd -7*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP1, MSGTMP0
@@ -148,9 +148,9 @@
movdqu 2*16(DATA_PTR), MSG
pshufb SHUF_MASK, MSG
movdqa MSG, MSGTMP2
- paddd 2*16(SHA256CONSTANTS), MSG
+ paddd -6*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP2, MSGTMP1
@@ -158,151 +158,151 @@
movdqu 3*16(DATA_PTR), MSG
pshufb SHUF_MASK, MSG
movdqa MSG, MSGTMP3
- paddd 3*16(SHA256CONSTANTS), MSG
+ paddd -5*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP3, MSGTMP4
palignr $4, MSGTMP2, MSGTMP4
paddd MSGTMP4, MSGTMP0
sha256msg2 MSGTMP3, MSGTMP0
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP3, MSGTMP2
/* Rounds 16-19 */
movdqa MSGTMP0, MSG
- paddd 4*16(SHA256CONSTANTS), MSG
+ paddd -4*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP0, MSGTMP4
palignr $4, MSGTMP3, MSGTMP4
paddd MSGTMP4, MSGTMP1
sha256msg2 MSGTMP0, MSGTMP1
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP0, MSGTMP3
/* Rounds 20-23 */
movdqa MSGTMP1, MSG
- paddd 5*16(SHA256CONSTANTS), MSG
+ paddd -3*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP1, MSGTMP4
palignr $4, MSGTMP0, MSGTMP4
paddd MSGTMP4, MSGTMP2
sha256msg2 MSGTMP1, MSGTMP2
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP1, MSGTMP0
/* Rounds 24-27 */
movdqa MSGTMP2, MSG
- paddd 6*16(SHA256CONSTANTS), MSG
+ paddd -2*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP2, MSGTMP4
palignr $4, MSGTMP1, MSGTMP4
paddd MSGTMP4, MSGTMP3
sha256msg2 MSGTMP2, MSGTMP3
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP2, MSGTMP1
/* Rounds 28-31 */
movdqa MSGTMP3, MSG
- paddd 7*16(SHA256CONSTANTS), MSG
+ paddd -1*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP3, MSGTMP4
palignr $4, MSGTMP2, MSGTMP4
paddd MSGTMP4, MSGTMP0
sha256msg2 MSGTMP3, MSGTMP0
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP3, MSGTMP2
/* Rounds 32-35 */
movdqa MSGTMP0, MSG
- paddd 8*16(SHA256CONSTANTS), MSG
+ paddd 0*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP0, MSGTMP4
palignr $4, MSGTMP3, MSGTMP4
paddd MSGTMP4, MSGTMP1
sha256msg2 MSGTMP0, MSGTMP1
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP0, MSGTMP3
/* Rounds 36-39 */
movdqa MSGTMP1, MSG
- paddd 9*16(SHA256CONSTANTS), MSG
+ paddd 1*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP1, MSGTMP4
palignr $4, MSGTMP0, MSGTMP4
paddd MSGTMP4, MSGTMP2
sha256msg2 MSGTMP1, MSGTMP2
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP1, MSGTMP0
/* Rounds 40-43 */
movdqa MSGTMP2, MSG
- paddd 10*16(SHA256CONSTANTS), MSG
+ paddd 2*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP2, MSGTMP4
palignr $4, MSGTMP1, MSGTMP4
paddd MSGTMP4, MSGTMP3
sha256msg2 MSGTMP2, MSGTMP3
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP2, MSGTMP1
/* Rounds 44-47 */
movdqa MSGTMP3, MSG
- paddd 11*16(SHA256CONSTANTS), MSG
+ paddd 3*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP3, MSGTMP4
palignr $4, MSGTMP2, MSGTMP4
paddd MSGTMP4, MSGTMP0
sha256msg2 MSGTMP3, MSGTMP0
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP3, MSGTMP2
/* Rounds 48-51 */
movdqa MSGTMP0, MSG
- paddd 12*16(SHA256CONSTANTS), MSG
+ paddd 4*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP0, MSGTMP4
palignr $4, MSGTMP3, MSGTMP4
paddd MSGTMP4, MSGTMP1
sha256msg2 MSGTMP0, MSGTMP1
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
sha256msg1 MSGTMP0, MSGTMP3
/* Rounds 52-55 */
movdqa MSGTMP1, MSG
- paddd 13*16(SHA256CONSTANTS), MSG
+ paddd 5*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP1, MSGTMP4
palignr $4, MSGTMP0, MSGTMP4
paddd MSGTMP4, MSGTMP2
sha256msg2 MSGTMP1, MSGTMP2
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
/* Rounds 56-59 */
movdqa MSGTMP2, MSG
- paddd 14*16(SHA256CONSTANTS), MSG
+ paddd 6*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
movdqa MSGTMP2, MSGTMP4
palignr $4, MSGTMP1, MSGTMP4
paddd MSGTMP4, MSGTMP3
sha256msg2 MSGTMP2, MSGTMP3
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
/* Rounds 60-63 */
movdqa MSGTMP3, MSG
- paddd 15*16(SHA256CONSTANTS), MSG
+ paddd 7*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
/* Add current hash values with previously saved */
@@ -315,11 +315,11 @@
jne .Lloop0
/* Write hash values back in the correct order */
- pshufd $0x1B, STATE0, STATE0 /* FEBA */
- pshufd $0xB1, STATE1, STATE1 /* DCHG */
movdqa STATE0, MSGTMP4
- pblendw $0xF0, STATE1, STATE0 /* DCBA */
- palignr $8, MSGTMP4, STATE1 /* HGFE */
+ punpcklqdq STATE1, STATE0 /* GHEF */
+ punpckhqdq MSGTMP4, STATE1 /* ABCD */
+ pshufd $0xB1, STATE0, STATE0 /* HGFE */
+ pshufd $0x1B, STATE1, STATE1 /* DCBA */
movdqu STATE0, 0*16(DIGEST_PTR)
movdqu STATE1, 1*16(DIGEST_PTR)
next reply other threads:[~2024-04-08 9:45 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-08 9:26 Stefan Kanthak [this message]
2024-04-08 12:37 ` [PATCH 1/2] crypto: s(h)aving 40+ bytes off arch/x86/crypto/sha256_ni_asm.S Eric Biggers
[not found] ` <9088939CC5454139901CEDD97DAFB004@H270>
2024-04-08 15:18 ` Eric Biggers
2024-04-09 10:23 ` Stefan Kanthak
2024-04-09 12:32 ` Eric Biggers
2024-04-08 13:12 ` Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5EEE09A9021540A5AAD8BFEEE915512D@H270 \
--to=stefan.kanthak@nexgo.de \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=sean.m.gulley@intel.com \
--cc=tim.c.chen@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).