summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2023-04-12 13:00:24 +0200
committerHerbert Xu <herbert@gondor.apana.org.au>2023-04-20 18:20:04 +0800
commitc75962f1c439de1590b038cb18466a859d59f209 (patch)
tree00c46c9ce6e1b497da25679487d82bf90fcabcd8 /arch
parent9d5aef1222337f593e52293bb94c5cf7139d4d83 (diff)
crypto: x86/aesni - Use RIP-relative addressing
Prefer RIP-relative addressing where possible, which removes the need for boot time relocation fixups. In the GCM case, we can get rid of the oversized permutation array entirely while at it. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S2
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S36
2 files changed, 8 insertions, 30 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 837c1e0aa021..ca99a2274d55 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2717,7 +2717,7 @@ SYM_FUNC_END(aesni_cts_cbc_dec)
* BSWAP_MASK == endian swapping mask
*/
SYM_FUNC_START_LOCAL(_aesni_inc_init)
- movaps .Lbswap_mask, BSWAP_MASK
+ movaps .Lbswap_mask(%rip), BSWAP_MASK
movaps IV, CTR
pshufb BSWAP_MASK, CTR
mov $1, TCTR_LOW
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 0852ab573fd3..b6ca80f188ff 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -154,30 +154,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
.octa 0x00000000000000000000000000000000
-.section .rodata
-.align 16
-.type aad_shift_arr, @object
-.size aad_shift_arr, 272
-aad_shift_arr:
- .octa 0xffffffffffffffffffffffffffffffff
- .octa 0xffffffffffffffffffffffffffffff0C
- .octa 0xffffffffffffffffffffffffffff0D0C
- .octa 0xffffffffffffffffffffffffff0E0D0C
- .octa 0xffffffffffffffffffffffff0F0E0D0C
- .octa 0xffffffffffffffffffffff0C0B0A0908
- .octa 0xffffffffffffffffffff0D0C0B0A0908
- .octa 0xffffffffffffffffff0E0D0C0B0A0908
- .octa 0xffffffffffffffff0F0E0D0C0B0A0908
- .octa 0xffffffffffffff0C0B0A090807060504
- .octa 0xffffffffffff0D0C0B0A090807060504
- .octa 0xffffffffff0E0D0C0B0A090807060504
- .octa 0xffffffff0F0E0D0C0B0A090807060504
- .octa 0xffffff0C0B0A09080706050403020100
- .octa 0xffff0D0C0B0A09080706050403020100
- .octa 0xff0E0D0C0B0A09080706050403020100
- .octa 0x0F0E0D0C0B0A09080706050403020100
-
-
.text
@@ -646,11 +622,13 @@ _get_AAD_rest4\@:
_get_AAD_rest0\@:
/* finalize: shift out the extra bytes we read, and align
left. since pslldq can only shift by an immediate, we use
- vpshufb and an array of shuffle masks */
- movq %r12, %r11
- salq $4, %r11
- vmovdqu aad_shift_arr(%r11), \T1
- vpshufb \T1, \T7, \T7
+ vpshufb and a pair of shuffle masks */
+ leaq ALL_F(%rip), %r11
+ subq %r12, %r11
+ vmovdqu 16(%r11), \T1
+ andq $~3, %r11
+ vpshufb (%r11), \T7, \T7
+ vpand \T1, \T7, \T7
_get_AAD_rest_final\@:
vpshufb SHUF_MASK(%rip), \T7, \T7
vpxor \T8, \T7, \T7