crypto: arm/crct10dif - revert to C code for short inputs

The SIMD routine ported from x86 used to have a special code path for inputs < 16 bytes, which got lost somewhere along the way. Instead, the current glue code aligns the input pointer to permit the NEON routine to use special versions of the vld1 instructions that assume 16 byte alignment, but this could result in inputs of less than 16 bytes to be passed in. This not only fails the new extended tests that Eric has implemented, it also results in the code reading past the end of the input, which could potentially result in crashes when dealing with less than 16 bytes of input at the end of a page which is followed by an unmapped page. So update the glue code to only invoke the NEON routine if the input is at least 16 bytes. Reported-by: Eric Biggers <ebiggers@kernel.org> Reviewed-by: Eric Biggers <ebiggers@kernel.org> Fixes: 1d481f1cd892 ("crypto: arm/crct10dif - port x86 SSE implementation to ARM") Cc: <stable@vger.kernel.org> # v4.10+ Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 2019-01-27 10:16:52 +0100
committer: Herbert Xu <herbert@gondor.apana.org.au> 2019-02-01 14:44:39 +0800
commit: 62fecf295e3c48be1b5f17c440b93875b9adb4d6 (patch)
tree: c20b9243d53b06e851157955a9ada1369db3ad7d /arch/arm/crypto
parent: c19650d6ea99bcd903d3e55dd61860026c701339 (diff)
2 files changed, 13 insertions, 24 deletions
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
index ce45ba0c0687..16019b5961e7 100644
--- a/arch/arm/crypto/crct10dif-ce-core.S
+++ b/arch/arm/crypto/crct10dif-ce-core.S
@@ -124,10 +124,10 @@ ENTRY(crc_t10dif_pmull)
 	vext.8		q10, qzr, q0, #4
 
 	// receive the initial 64B data, xor the initial crc value
-	vld1.64		{q0-q1}, [arg2, :128]!
-	vld1.64		{q2-q3}, [arg2, :128]!
-	vld1.64		{q4-q5}, [arg2, :128]!
-	vld1.64		{q6-q7}, [arg2, :128]!
+	vld1.64		{q0-q1}, [arg2]!
+	vld1.64		{q2-q3}, [arg2]!
+	vld1.64		{q4-q5}, [arg2]!
+	vld1.64		{q6-q7}, [arg2]!
 CPU_LE(	vrev64.8	q0, q0			)
 CPU_LE(	vrev64.8	q1, q1			)
 CPU_LE(	vrev64.8	q2, q2			)
@@ -167,7 +167,7 @@ CPU_LE(	vrev64.8	q7, q7			)
 _fold_64_B_loop:
 
 	.macro		fold64, reg1, reg2
-	vld1.64		{q11-q12}, [arg2, :128]!
+	vld1.64		{q11-q12}, [arg2]!
 
 	vmull.p64	q8, \reg1\()h, d21
 	vmull.p64	\reg1, \reg1\()l, d20
@@ -238,7 +238,7 @@ _16B_reduction_loop:
 	vmull.p64	q7, d15, d21
 	veor.8		q7, q7, q8
 
-	vld1.64		{q0}, [arg2, :128]!
+	vld1.64		{q0}, [arg2]!
 CPU_LE(	vrev64.8	q0, q0		)
 	vswp		d0, d1
 	veor.8		q7, q7, q0
@@ -335,7 +335,7 @@ _less_than_128:
 	vmov.i8		q0, #0
 	vmov		s3, arg1_low32		// get the initial crc value
 
-	vld1.64		{q7}, [arg2, :128]!
+	vld1.64		{q7}, [arg2]!
 CPU_LE(	vrev64.8	q7, q7		)
 	vswp		d14, d15
 	veor.8		q7, q7, q0
diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c
index d428355cf38d..14c19c70a841 100644
--- a/arch/arm/crypto/crct10dif-ce-glue.c
+++ b/arch/arm/crypto/crct10dif-ce-glue.c
@@ -35,26 +35,15 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int length)
 {
 	u16 *crc = shash_desc_ctx(desc);
-	unsigned int l;
 
-	if (!may_use_simd()) {
-		*crc = crc_t10dif_generic(*crc, data, length);
+	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+		kernel_neon_begin();
+		*crc = crc_t10dif_pmull(*crc, data, length);
+		kernel_neon_end();
 	} else {
-		if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
-			l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
-				  ((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
-
-			*crc = crc_t10dif_generic(*crc, data, l);
-
-			length -= l;
-			data += l;
-		}
-		if (length > 0) {
-			kernel_neon_begin();
-			*crc = crc_t10dif_pmull(*crc, data, length);
-			kernel_neon_end();
-		}
+		*crc = crc_t10dif_generic(*crc, data, length);
 	}
+
 	return 0;
 }
author	Ard Biesheuvel <ard.biesheuvel@linaro.org>	2019-01-27 10:16:52 +0100
committer	Herbert Xu <herbert@gondor.apana.org.au>	2019-02-01 14:44:39 +0800
commit	62fecf295e3c48be1b5f17c440b93875b9adb4d6 (patch)
tree	c20b9243d53b06e851157955a9ada1369db3ad7d /arch/arm/crypto
parent	c19650d6ea99bcd903d3e55dd61860026c701339 (diff)