diff options
-rw-r--r-- | recipes/libgcrypt/0002-add-other-missing-file.patch | 881 | ||||
-rw-r--r-- | recipes/libgcrypt/libgcrypt.recipe | 2 |
2 files changed, 882 insertions, 1 deletions
diff --git a/recipes/libgcrypt/0002-add-other-missing-file.patch b/recipes/libgcrypt/0002-add-other-missing-file.patch new file mode 100644 index 00000000..fd6fc6a8 --- /dev/null +++ b/recipes/libgcrypt/0002-add-other-missing-file.patch @@ -0,0 +1,881 @@ +From f9eed7fcec0b61f5ede54dec30235eb53a538217 Mon Sep 17 00:00:00 2001 +From: Edward Hervey <edward@collabora.com> +Date: Wed Jan 8 20:15:44 2014 +0100 + + Add other missing file + +diff --git a/cipher/serpent-armv7-neon.S b/cipher/serpent-armv7-neon.S +new file mode 100644 +index 0000000..3559558 +--- /dev/null ++++ b/cipher/serpent-armv7-neon.S +@@ -0,0 +1,869 @@ ++/* serpent-armv7-neon.S - ARM/NEON assembly implementation of Serpent cipher ++ * ++ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> ++ * ++ * This file is part of Libgcrypt. ++ * ++ * Libgcrypt is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU Lesser General Public License as ++ * published by the Free Software Foundation; either version 2.1 of ++ * the License, or (at your option) any later version. ++ * ++ * Libgcrypt is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#include <config.h> ++ ++#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ ++ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ ++ defined(HAVE_GCC_INLINE_ASM_NEON) ++ ++.text ++ ++.syntax unified ++.fpu neon ++.arm ++ ++/* ARM registers */ ++#define RROUND r0 ++ ++/* NEON vector registers */ ++#define RA0 q0 ++#define RA1 q1 ++#define RA2 q2 ++#define RA3 q3 ++#define RA4 q4 ++#define RB0 q5 ++#define RB1 q6 ++#define RB2 q7 ++#define RB3 q8 ++#define RB4 q9 ++ ++#define RT0 q10 ++#define RT1 q11 ++#define RT2 q12 ++#define RT3 q13 ++ ++#define RA0d0 d0 ++#define RA0d1 d1 ++#define RA1d0 d2 ++#define RA1d1 d3 ++#define RA2d0 d4 ++#define RA2d1 d5 ++#define RA3d0 d6 ++#define RA3d1 d7 ++#define RA4d0 d8 ++#define RA4d1 d9 ++#define RB0d0 d10 ++#define RB0d1 d11 ++#define RB1d0 d12 ++#define RB1d1 d13 ++#define RB2d0 d14 ++#define RB2d1 d15 ++#define RB3d0 d16 ++#define RB3d1 d17 ++#define RB4d0 d18 ++#define RB4d1 d19 ++#define RT0d0 d20 ++#define RT0d1 d21 ++#define RT1d0 d22 ++#define RT1d1 d23 ++#define RT2d0 d24 ++#define RT2d1 d25 ++ ++/********************************************************************** ++ helper macros ++ **********************************************************************/ ++ ++#define transpose_4x4(_q0, _q1, _q2, _q3) \ ++ vtrn.32 _q0, _q1; \ ++ vtrn.32 _q2, _q3; \ ++ vswp _q0##d1, _q2##d0; \ ++ vswp _q1##d1, _q3##d0; ++ ++/********************************************************************** ++ 8-way serpent ++ **********************************************************************/ ++ ++/* ++ * These are the S-Boxes of Serpent from following research paper. ++ * ++ * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, ++ * (New York, New York, USA), p. 317–329, National Institute of Standards and ++ * Technology, 2000. ++ * ++ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf ++ * ++ */ ++#define SBOX0(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ veor a3, a3, a0; veor b3, b3, b0; vmov a4, a1; vmov b4, b1; \ ++ vand a1, a1, a3; vand b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ ++ veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ ++ veor a0, a0, a4; veor b0, b0, b4; veor a4, a4, a3; veor b4, b4, b3; \ ++ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \ ++ veor a2, a2, a4; veor b2, b2, b4; vmvn a4, a4; vmvn b4, b4; \ ++ vorr a4, a4, a1; vorr b4, b4, b1; veor a1, a1, a3; veor b1, b1, b3; \ ++ veor a1, a1, a4; veor b1, b1, b4; vorr a3, a3, a0; vorr b3, b3, b0; \ ++ veor a1, a1, a3; veor b1, b1, b3; veor a4, a3; veor b4, b3; ++ ++#define SBOX0_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmvn a2, a2; vmvn b2, b2; vmov a4, a1; vmov b4, b1; \ ++ vorr a1, a1, a0; vorr b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \ ++ veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ ++ veor a1, a1, a3; veor b1, b1, b3; veor a0, a0, a4; veor b0, b0, b4; \ ++ veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a3; vand b0, b0, b3; \ ++ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a1; vorr b0, b0, b1; \ ++ veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \ ++ veor a2, a2, a1; veor b2, b2, b1; veor a3, a3, a0; veor b3, b3, b0; \ ++ veor a3, a3, a1; veor b3, b3, b1;\ ++ vand a2, a2, a3; vand b2, b2, b3;\ ++ veor a4, a2; veor b4, b2; ++ ++#define SBOX1(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmvn a0, a0; vmvn b0, b0; vmvn a2, a2; vmvn b2, b2; \ ++ vmov a4, a0; vmov b4, b0; vand a0, a0, a1; vand b0, b0, b1; \ ++ veor a2, a2, a0; veor b2, b2, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ ++ veor a3, a3, a2; veor b3, b3, b2; veor a1, a1, a0; veor b1, b1, b0; \ ++ veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a1; vorr b4, b4, b1; \ ++ veor a1, a1, a3; veor b1, b1, b3; vorr a2, a2, a0; vorr b2, b2, b0; \ ++ vand a2, a2, a4; vand b2, b2, b4; veor a0, a0, a1; veor b0, b0, b1; \ ++ vand a1, a1, a2; vand b1, b1, b2;\ ++ veor a1, a1, a0; veor b1, b1, b0; vand a0, a0, a2; vand b0, b0, b2; \ ++ veor a0, a4; veor b0, b4; ++ ++#define SBOX1_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmov a4, a1; vmov b4, b1; veor a1, a1, a3; veor b1, b1, b3; \ ++ vand a3, a3, a1; vand b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \ ++ veor a3, a3, a0; veor b3, b3, b0; vorr a0, a0, a1; vorr b0, b0, b1; \ ++ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a4; veor b0, b0, b4; \ ++ vorr a0, a0, a2; vorr b0, b0, b2; veor a1, a1, a3; veor b1, b1, b3; \ ++ veor a0, a0, a1; veor b0, b0, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ ++ veor a1, a1, a0; veor b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \ ++ veor a4, a4, a1; veor b4, b4, b1; vorr a1, a1, a0; vorr b1, b1, b0; \ ++ veor a1, a1, a0; veor b1, b1, b0;\ ++ vorr a1, a1, a4; vorr b1, b1, b4;\ ++ veor a3, a1; veor b3, b1; ++ ++#define SBOX2(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmov a4, a0; vmov b4, b0; vand a0, a0, a2; vand b0, b0, b2; \ ++ veor a0, a0, a3; veor b0, b0, b3; veor a2, a2, a1; veor b2, b2, b1; \ ++ veor a2, a2, a0; veor b2, b2, b0; vorr a3, a3, a4; vorr b3, b3, b4; \ ++ veor a3, a3, a1; veor b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \ ++ vmov a1, a3; vmov b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \ ++ veor a3, a3, a0; veor b3, b3, b0; vand a0, a0, a1; vand b0, b0, b1; \ ++ veor a4, a4, a0; veor b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \ ++ veor a1, a1, a4; veor b1, b1, b4; vmvn a4, a4; vmvn b4, b4; ++ ++#define SBOX2_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ ++ vmov a4, a3; vmov b4, b3; vand a3, a3, a2; vand b3, b3, b2; \ ++ veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a2; vorr b1, b1, b2; \ ++ veor a1, a1, a4; veor b1, b1, b4; vand a4, a4, a3; vand b4, b4, b3; \ ++ veor a2, a2, a3; veor b2, b2, b3; vand a4, a4, a0; vand b4, b4, b0; \ ++ veor a4, a4, a2; veor b4, b4, b2; vand a2, a2, a1; vand b2, b2, b1; \ ++ vorr a2, a2, a0; vorr b2, b2, b0; vmvn a3, a3; vmvn b3, b3; \ ++ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \ ++ vand a0, a0, a1; vand b0, b0, b1; veor a3, a3, a4; veor b3, b3, b4; \ ++ veor a3, a0; veor b3, b0; ++ ++#define SBOX3(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmov a4, a0; vmov b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ ++ veor a3, a3, a1; veor b3, b3, b1; vand a1, a1, a4; vand b1, b1, b4; \ ++ veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a3; veor b2, b2, b3; \ ++ vand a3, a3, a0; vand b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \ ++ veor a3, a3, a4; veor b3, b3, b4; veor a0, a0, a1; veor b0, b0, b1; \ ++ vand a4, a4, a0; vand b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \ ++ veor a4, a4, a2; veor b4, b4, b2; vorr a1, a1, a0; vorr b1, b1, b0; \ ++ veor a1, a1, a2; veor b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \ ++ vmov a2, a1; vmov b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ ++ veor a1, a0; veor b1, b0; ++ ++#define SBOX3_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmov a4, a2; vmov b4, b2; veor a2, a2, a1; veor b2, b2, b1; \ ++ veor a0, a0, a2; veor b0, b0, b2; vand a4, a4, a2; vand b4, b4, b2; \ ++ veor a4, a4, a0; veor b4, b4, b0; vand a0, a0, a1; vand b0, b0, b1; \ ++ veor a1, a1, a3; veor b1, b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \ ++ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \ ++ veor a1, a1, a4; veor b1, b1, b4; vand a3, a3, a2; vand b3, b3, b2; \ ++ veor a3, a3, a1; veor b3, b3, b1; veor a1, a1, a0; veor b1, b1, b0; \ ++ vorr a1, a1, a2; vorr b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \ ++ veor a1, a1, a4; veor b1, b1, b4;\ ++ veor a0, a1; veor b0, b1; ++ ++#define SBOX4(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ veor a1, a1, a3; veor b1, b1, b3; vmvn a3, a3; vmvn b3, b3; \ ++ veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ ++ vmov a4, a1; vmov b4, b1; vand a1, a1, a3; vand b1, b1, b3; \ ++ veor a1, a1, a2; veor b1, b1, b2; veor a4, a4, a3; veor b4, b4, b3; \ ++ veor a0, a0, a4; veor b0, b0, b4; vand a2, a2, a4; vand b2, b2, b4; \ ++ veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a1; vand b0, b0, b1; \ ++ veor a3, a3, a0; veor b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \ ++ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ ++ veor a0, a0, a2; veor b0, b0, b2; vand a2, a2, a3; vand b2, b2, b3; \ ++ vmvn a0, a0; vmvn b0, b0; veor a4, a2; veor b4, b2; ++ ++#define SBOX4_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmov a4, a2; vmov b4, b2; vand a2, a2, a3; vand b2, b2, b3; \ ++ veor a2, a2, a1; veor b2, b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \ ++ vand a1, a1, a0; vand b1, b1, b0; veor a4, a4, a2; veor b4, b4, b2; \ ++ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \ ++ vmvn a0, a0; vmvn b0, b0; veor a3, a3, a4; veor b3, b3, b4; \ ++ veor a1, a1, a3; veor b1, b1, b3; vand a3, a3, a0; vand b3, b3, b0; \ ++ veor a3, a3, a2; veor b3, b3, b2; veor a0, a0, a1; veor b0, b0, b1; \ ++ vand a2, a2, a0; vand b2, b2, b0; veor a3, a3, a0; veor b3, b3, b0; \ ++ veor a2, a2, a4; veor b2, b2, b4;\ ++ vorr a2, a2, a3; vorr b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \ ++ veor a2, a1; veor b2, b1; ++ ++#define SBOX5(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ veor a0, a0, a1; veor b0, b0, b1; veor a1, a1, a3; veor b1, b1, b3; \ ++ vmvn a3, a3; vmvn b3, b3; vmov a4, a1; vmov b4, b1; \ ++ vand a1, a1, a0; vand b1, b1, b0; veor a2, a2, a3; veor b2, b2, b3; \ ++ veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ ++ veor a4, a4, a3; veor b4, b4, b3; vand a3, a3, a1; vand b3, b3, b1; \ ++ veor a3, a3, a0; veor b3, b3, b0; veor a4, a4, a1; veor b4, b4, b1; \ ++ veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a0; veor b2, b2, b0; \ ++ vand a0, a0, a3; vand b0, b0, b3; vmvn a2, a2; vmvn b2, b2; \ ++ veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a3; vorr b4, b4, b3; \ ++ veor a2, a4; veor b2, b4; ++ ++#define SBOX5_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmvn a1, a1; vmvn b1, b1; vmov a4, a3; vmov b4, b3; \ ++ veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a0; vorr b3, b3, b0; \ ++ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \ ++ vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \ ++ veor a2, a2, a4; veor b2, b2, b4; vorr a4, a4, a0; vorr b4, b4, b0; \ ++ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \ ++ veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ ++ vand a3, a3, a4; vand b3, b3, b4; veor a4, a4, a1; veor b4, b4, b1; \ ++ veor a3, a3, a4; veor b3, b3, b4; vmvn a4, a4; vmvn b4, b4; \ ++ veor a3, a0; veor b3, b0; ++ ++#define SBOX6(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmvn a2, a2; vmvn b2, b2; vmov a4, a3; vmov b4, b3; \ ++ vand a3, a3, a0; vand b3, b3, b0; veor a0, a0, a4; veor b0, b0, b4; \ ++ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a4; vorr b2, b2, b4; \ ++ veor a1, a1, a3; veor b1, b1, b3; veor a2, a2, a0; veor b2, b2, b0; \ ++ vorr a0, a0, a1; vorr b0, b0, b1; veor a2, a2, a1; veor b2, b2, b1; \ ++ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \ ++ veor a0, a0, a2; veor b0, b0, b2; veor a4, a4, a3; veor b4, b4, b3; \ ++ veor a4, a4, a0; veor b4, b4, b0; vmvn a3, a3; vmvn b3, b3; \ ++ vand a2, a2, a4; vand b2, b2, b4;\ ++ veor a2, a3; veor b2, b3; ++ ++#define SBOX6_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ veor a0, a0, a2; veor b0, b0, b2; vmov a4, a2; vmov b4, b2; \ ++ vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \ ++ vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \ ++ veor a2, a2, a3; veor b2, b2, b3; vorr a4, a4, a0; vorr b4, b4, b0; \ ++ veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \ ++ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a3; vand b1, b1, b3; \ ++ veor a1, a1, a0; veor b1, b1, b0; veor a0, a0, a3; veor b0, b0, b3; \ ++ vorr a0, a0, a2; vorr b0, b0, b2; veor a3, a3, a1; veor b3, b3, b1; \ ++ veor a4, a0; veor b4, b0; ++ ++#define SBOX7(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmov a4, a1; vmov b4, b1; vorr a1, a1, a2; vorr b1, b1, b2; \ ++ veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \ ++ veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a4; vorr b3, b3, b4; \ ++ vand a3, a3, a0; vand b3, b3, b0; veor a4, a4, a2; veor b4, b4, b2; \ ++ veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a4; vorr b1, b1, b4; \ ++ veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a4; vorr b0, b0, b4; \ ++ veor a0, a0, a2; veor b0, b0, b2; veor a1, a1, a4; veor b1, b1, b4; \ ++ veor a2, a2, a1; veor b2, b2, b1; vand a1, a1, a0; vand b1, b1, b0; \ ++ veor a1, a1, a4; veor b1, b1, b4; vmvn a2, a2; vmvn b2, b2; \ ++ vorr a2, a2, a0; vorr b2, b2, b0;\ ++ veor a4, a2; veor b4, b2; ++ ++#define SBOX7_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vmov a4, a2; vmov b4, b2; veor a2, a2, a0; veor b2, b2, b0; \ ++ vand a0, a0, a3; vand b0, b0, b3; vorr a4, a4, a3; vorr b4, b4, b3; \ ++ vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \ ++ vorr a1, a1, a0; vorr b1, b1, b0; veor a0, a0, a2; veor b0, b0, b2; \ ++ vand a2, a2, a4; vand b2, b2, b4; vand a3, a3, a4; vand b3, b3, b4; \ ++ veor a1, a1, a2; veor b1, b1, b2; veor a2, a2, a0; veor b2, b2, b0; \ ++ vorr a0, a0, a2; vorr b0, b0, b2; veor a4, a4, a1; veor b4, b4, b1; \ ++ veor a0, a0, a3; veor b0, b0, b3; veor a3, a3, a4; veor b3, b3, b4; \ ++ vorr a4, a4, a0; vorr b4, b4, b0; veor a3, a3, a2; veor b3, b3, b2; \ ++ veor a4, a2; veor b4, b2; ++ ++/* Apply SBOX number WHICH to to the block. */ ++#define SBOX(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ SBOX##which (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) ++ ++/* Apply inverse SBOX number WHICH to to the block. */ ++#define SBOX_INVERSE(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ SBOX##which##_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) ++ ++/* XOR round key into block state in a0,a1,a2,a3. a4 used as temporary. */ ++#define BLOCK_XOR_KEY(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vdup.32 RT3, RT0d0[0]; \ ++ vdup.32 RT1, RT0d0[1]; \ ++ vdup.32 RT2, RT0d1[0]; \ ++ vdup.32 RT0, RT0d1[1]; \ ++ veor a0, a0, RT3; veor b0, b0, RT3; \ ++ veor a1, a1, RT1; veor b1, b1, RT1; \ ++ veor a2, a2, RT2; veor b2, b2, RT2; \ ++ veor a3, a3, RT0; veor b3, b3, RT0; ++ ++#define BLOCK_LOAD_KEY_ENC() \ ++ vld1.8 {RT0d0, RT0d1}, [RROUND]!; ++ ++#define BLOCK_LOAD_KEY_DEC() \ ++ vld1.8 {RT0d0, RT0d1}, [RROUND]; \ ++ sub RROUND, RROUND, #16 ++ ++/* Apply the linear transformation to BLOCK. */ ++#define LINEAR_TRANSFORMATION(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vshl.u32 a4, a0, #13; vshl.u32 b4, b0, #13; \ ++ vshr.u32 a0, a0, #(32-13); vshr.u32 b0, b0, #(32-13); \ ++ veor a0, a0, a4; veor b0, b0, b4; \ ++ vshl.u32 a4, a2, #3; vshl.u32 b4, b2, #3; \ ++ vshr.u32 a2, a2, #(32-3); vshr.u32 b2, b2, #(32-3); \ ++ veor a2, a2, a4; veor b2, b2, b4; \ ++ veor a1, a0, a1; veor b1, b0, b1; \ ++ veor a1, a2, a1; veor b1, b2, b1; \ ++ vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \ ++ veor a3, a2, a3; veor b3, b2, b3; \ ++ veor a3, a4, a3; veor b3, b4, b3; \ ++ vshl.u32 a4, a1, #1; vshl.u32 b4, b1, #1; \ ++ vshr.u32 a1, a1, #(32-1); vshr.u32 b1, b1, #(32-1); \ ++ veor a1, a1, a4; veor b1, b1, b4; \ ++ vshl.u32 a4, a3, #7; vshl.u32 b4, b3, #7; \ ++ vshr.u32 a3, a3, #(32-7); vshr.u32 b3, b3, #(32-7); \ ++ veor a3, a3, a4; veor b3, b3, b4; \ ++ veor a0, a1, a0; veor b0, b1, b0; \ ++ veor a0, a3, a0; veor b0, b3, b0; \ ++ vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \ ++ veor a2, a3, a2; veor b2, b3, b2; \ ++ veor a2, a4, a2; veor b2, b4, b2; \ ++ vshl.u32 a4, a0, #5; vshl.u32 b4, b0, #5; \ ++ vshr.u32 a0, a0, #(32-5); vshr.u32 b0, b0, #(32-5); \ ++ veor a0, a0, a4; veor b0, b0, b4; \ ++ vshl.u32 a4, a2, #22; vshl.u32 b4, b2, #22; \ ++ vshr.u32 a2, a2, #(32-22); vshr.u32 b2, b2, #(32-22); \ ++ veor a2, a2, a4; veor b2, b2, b4; ++ ++/* Apply the inverse linear transformation to BLOCK. */ ++#define LINEAR_TRANSFORMATION_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \ ++ vshr.u32 a4, a2, #22; vshr.u32 b4, b2, #22; \ ++ vshl.u32 a2, a2, #(32-22); vshl.u32 b2, b2, #(32-22); \ ++ veor a2, a2, a4; veor b2, b2, b4; \ ++ vshr.u32 a4, a0, #5; vshr.u32 b4, b0, #5; \ ++ vshl.u32 a0, a0, #(32-5); vshl.u32 b0, b0, #(32-5); \ ++ veor a0, a0, a4; veor b0, b0, b4; \ ++ vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \ ++ veor a2, a3, a2; veor b2, b3, b2; \ ++ veor a2, a4, a2; veor b2, b4, b2; \ ++ veor a0, a1, a0; veor b0, b1, b0; \ ++ veor a0, a3, a0; veor b0, b3, b0; \ ++ vshr.u32 a4, a3, #7; vshr.u32 b4, b3, #7; \ ++ vshl.u32 a3, a3, #(32-7); vshl.u32 b3, b3, #(32-7); \ ++ veor a3, a3, a4; veor b3, b3, b4; \ ++ vshr.u32 a4, a1, #1; vshr.u32 b4, b1, #1; \ ++ vshl.u32 a1, a1, #(32-1); vshl.u32 b1, b1, #(32-1); \ ++ veor a1, a1, a4; veor b1, b1, b4; \ ++ vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \ ++ veor a3, a2, a3; veor b3, b2, b3; \ ++ veor a3, a4, a3; veor b3, b4, b3; \ ++ veor a1, a0, a1; veor b1, b0, b1; \ ++ veor a1, a2, a1; veor b1, b2, b1; \ ++ vshr.u32 a4, a2, #3; vshr.u32 b4, b2, #3; \ ++ vshl.u32 a2, a2, #(32-3); vshl.u32 b2, b2, #(32-3); \ ++ veor a2, a2, a4; veor b2, b2, b4; \ ++ vshr.u32 a4, a0, #13; vshr.u32 b4, b0, #13; \ ++ vshl.u32 a0, a0, #(32-13); vshl.u32 b0, b0, #(32-13); \ ++ veor a0, a0, a4; veor b0, b0, b4; ++ ++/* Apply a Serpent round to eight parallel blocks. This macro increments ++ `round'. */ ++#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ ++ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ ++ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ BLOCK_LOAD_KEY_ENC (); \ ++ SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); ++ ++/* Apply the last Serpent round to eight parallel blocks. This macro increments ++ `round'. */ ++#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \ ++ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \ ++ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ BLOCK_LOAD_KEY_ENC (); \ ++ SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); ++ ++/* Apply an inverse Serpent round to eight parallel blocks. This macro ++ increments `round'. */ ++#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \ ++ na0, na1, na2, na3, na4, \ ++ b0, b1, b2, b3, b4, \ ++ nb0, nb1, nb2, nb3, nb4) \ ++ LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \ ++ BLOCK_LOAD_KEY_DEC (); ++ ++/* Apply the first inverse Serpent round to eight parallel blocks. This macro ++ increments `round'. */ ++#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \ ++ na0, na1, na2, na3, na4, \ ++ b0, b1, b2, b3, b4, \ ++ nb0, nb1, nb2, nb3, nb4) \ ++ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ BLOCK_LOAD_KEY_DEC (); \ ++ SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \ ++ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \ ++ BLOCK_LOAD_KEY_DEC (); ++ ++.align 3 ++.type __serpent_enc_blk8,%function; ++__serpent_enc_blk8: ++ /* input: ++ * r0: round key pointer ++ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext ++ * blocks ++ * output: ++ * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel ++ * ciphertext blocks ++ */ ++ ++ transpose_4x4(RA0, RA1, RA2, RA3); ++ BLOCK_LOAD_KEY_ENC (); ++ transpose_4x4(RB0, RB1, RB2, RB3); ++ ++ ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, ++ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); ++ ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, ++ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); ++ ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, ++ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); ++ ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, ++ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); ++ ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, ++ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); ++ ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, ++ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); ++ ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, ++ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); ++ ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, ++ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); ++ ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0, ++ RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0); ++ ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0, ++ RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0); ++ ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2, ++ RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2); ++ ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4, ++ RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4); ++ ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0, ++ RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0); ++ ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0, ++ RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0); ++ ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3, ++ RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3); ++ ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0, ++ RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0); ++ ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4, ++ RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4); ++ ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4, ++ RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4); ++ ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2, ++ RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2); ++ ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3, ++ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3); ++ ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4, ++ RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4); ++ ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4, ++ RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4); ++ ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0, ++ RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0); ++ ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4, ++ RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4); ++ ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3, ++ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3); ++ ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3, ++ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3); ++ ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2, ++ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2); ++ ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0, ++ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0); ++ ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3, ++ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3); ++ ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3, ++ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3); ++ ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4, ++ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4); ++ ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3, ++ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3); ++ ++ transpose_4x4(RA4, RA1, RA2, RA0); ++ transpose_4x4(RB4, RB1, RB2, RB0); ++ ++ bx lr; ++.size __serpent_enc_blk8,.-__serpent_enc_blk8; ++ ++.align 3 ++.type __serpent_dec_blk8,%function; ++__serpent_dec_blk8: ++ /* input: ++ * r0: round key pointer ++ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel ++ * ciphertext blocks ++ * output: ++ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext ++ * blocks ++ */ ++ ++ add RROUND, RROUND, #(32*16); ++ ++ transpose_4x4(RA0, RA1, RA2, RA3); ++ BLOCK_LOAD_KEY_DEC (); ++ transpose_4x4(RB0, RB1, RB2, RB3); ++ ++ ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4, ++ RA3, RA0, RA1, RA4, RA2, ++ RB0, RB1, RB2, RB3, RB4, ++ RB3, RB0, RB1, RB4, RB2); ++ ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3, ++ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3); ++ ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0, ++ RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0); ++ ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3, ++ RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3); ++ ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3, ++ RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3); ++ ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4, ++ RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4); ++ ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3, ++ RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3); ++ ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1, ++ RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1); ++ ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2, ++ RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2); ++ ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0, ++ RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0); ++ ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4, ++ RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4); ++ ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0, ++ RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0); ++ ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0, ++ RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0); ++ ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1, ++ RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1); ++ ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0, ++ RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0); ++ ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3, ++ RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3); ++ ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2, ++ RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2); ++ ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4, ++ RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4); ++ ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1, ++ RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1); ++ ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4, ++ RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4); ++ ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4, ++ RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4); ++ ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3, ++ RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3); ++ ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4, ++ RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4); ++ ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0, ++ RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0); ++ ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2, ++ RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2); ++ ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1, ++ RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1); ++ ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3, ++ RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3); ++ ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1, ++ RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1); ++ ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1, ++ RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1); ++ ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0, ++ RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0); ++ ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1, ++ RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1); ++ ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4, ++ RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4); ++ ++ transpose_4x4(RA0, RA1, RA2, RA3); ++ transpose_4x4(RB0, RB1, RB2, RB3); ++ ++ bx lr; ++.size __serpent_dec_blk8,.-__serpent_dec_blk8; ++ ++.align 3 ++.globl _gcry_serpent_neon_ctr_enc ++.type _gcry_serpent_neon_ctr_enc,%function; ++_gcry_serpent_neon_ctr_enc: ++ /* input: ++ * r0: ctx, CTX ++ * r1: dst (8 blocks) ++ * r2: src (8 blocks) ++ * r3: iv ++ */ ++ ++ vmov.u8 RT1d0, #0xff; /* u64: -1 */ ++ push {r4,lr}; ++ vadd.u64 RT2d0, RT1d0, RT1d0; /* u64: -2 */ ++ vpush {RA4-RB2}; ++ ++ /* load IV and byteswap */ ++ vld1.8 {RA0}, [r3]; ++ vrev64.u8 RT0, RA0; /* be => le */ ++ ldr r4, [r3, #8]; ++ ++ /* construct IVs */ ++ vsub.u64 RA2d1, RT0d1, RT2d0; /* +2 */ ++ vsub.u64 RA1d1, RT0d1, RT1d0; /* +1 */ ++ cmp r4, #-1; ++ ++ vsub.u64 RB0d1, RA2d1, RT2d0; /* +4 */ ++ vsub.u64 RA3d1, RA2d1, RT1d0; /* +3 */ ++ ldr r4, [r3, #12]; ++ ++ vsub.u64 RB2d1, RB0d1, RT2d0; /* +6 */ ++ vsub.u64 RB1d1, RB0d1, RT1d0; /* +5 */ ++ ++ vsub.u64 RT2d1, RB2d1, RT2d0; /* +8 */ ++ vsub.u64 RB3d1, RB2d1, RT1d0; /* +7 */ ++ ++ vmov RA1d0, RT0d0; ++ vmov RA2d0, RT0d0; ++ vmov RA3d0, RT0d0; ++ vmov RB0d0, RT0d0; ++ rev r4, r4; ++ vmov RB1d0, RT0d0; ++ vmov RB2d0, RT0d0; ++ vmov RB3d0, RT0d0; ++ vmov RT2d0, RT0d0; ++ ++ /* check need for handling 64-bit overflow and carry */ ++ beq .Ldo_ctr_carry; ++ ++.Lctr_carry_done: ++ /* le => be */ ++ vrev64.u8 RA1, RA1; ++ vrev64.u8 RA2, RA2; ++ vrev64.u8 RA3, RA3; ++ vrev64.u8 RB0, RB0; ++ vrev64.u8 RT2, RT2; ++ vrev64.u8 RB1, RB1; ++ vrev64.u8 RB2, RB2; ++ vrev64.u8 RB3, RB3; ++ /* store new IV */ ++ vst1.8 {RT2}, [r3]; ++ ++ bl __serpent_enc_blk8; ++ ++ vld1.8 {RT0, RT1}, [r2]!; ++ vld1.8 {RT2, RT3}, [r2]!; ++ veor RA4, RA4, RT0; ++ veor RA1, RA1, RT1; ++ vld1.8 {RT0, RT1}, [r2]!; ++ veor RA2, RA2, RT2; ++ veor RA0, RA0, RT3; ++ vld1.8 {RT2, RT3}, [r2]!; ++ veor RB4, RB4, RT0; ++ veor RT0, RT0; ++ veor RB1, RB1, RT1; ++ veor RT1, RT1; ++ veor RB2, RB2, RT2; ++ veor RT2, RT2; ++ veor RB0, RB0, RT3; ++ veor RT3, RT3; ++ ++ vst1.8 {RA4}, [r1]!; ++ vst1.8 {RA1}, [r1]!; ++ veor RA1, RA1; ++ vst1.8 {RA2}, [r1]!; ++ veor RA2, RA2; ++ vst1.8 {RA0}, [r1]!; ++ veor RA0, RA0; ++ vst1.8 {RB4}, [r1]!; ++ veor RB4, RB4; ++ vst1.8 {RB1}, [r1]!; ++ vst1.8 {RB2}, [r1]!; ++ vst1.8 {RB0}, [r1]!; ++ ++ vpop {RA4-RB2}; ++ ++ /* clear the used registers */ ++ veor RA3, RA3; ++ veor RB3, RB3; ++ ++ pop {r4,pc}; ++ ++.Ldo_ctr_carry: ++ cmp r4, #-8; ++ blo .Lctr_carry_done; ++ beq .Lcarry_RT2; ++ ++ cmp r4, #-6; ++ blo .Lcarry_RB3; ++ beq .Lcarry_RB2; ++ ++ cmp r4, #-4; ++ blo .Lcarry_RB1; ++ beq .Lcarry_RB0; ++ ++ cmp r4, #-2; ++ blo .Lcarry_RA3; ++ beq .Lcarry_RA2; ++ ++ vsub.u64 RA1d0, RT1d0; ++.Lcarry_RA2: ++ vsub.u64 RA2d0, RT1d0; ++.Lcarry_RA3: ++ vsub.u64 RA3d0, RT1d0; ++.Lcarry_RB0: ++ vsub.u64 RB0d0, RT1d0; ++.Lcarry_RB1: ++ vsub.u64 RB1d0, RT1d0; ++.Lcarry_RB2: ++ vsub.u64 RB2d0, RT1d0; ++.Lcarry_RB3: ++ vsub.u64 RB3d0, RT1d0; ++.Lcarry_RT2: ++ vsub.u64 RT2d0, RT1d0; ++ ++ b .Lctr_carry_done; ++.size _gcry_serpent_neon_ctr_enc,.-_gcry_serpent_neon_ctr_enc; ++ ++.align 3 ++.globl _gcry_serpent_neon_cfb_dec ++.type _gcry_serpent_neon_cfb_dec,%function; ++_gcry_serpent_neon_cfb_dec: ++ /* input: ++ * r0: ctx, CTX ++ * r1: dst (8 blocks) ++ * r2: src (8 blocks) ++ * r3: iv ++ */ ++ ++ push {lr}; ++ vpush {RA4-RB2}; ++ ++ /* Load input */ ++ vld1.8 {RA0}, [r3]; ++ vld1.8 {RA1, RA2}, [r2]!; ++ vld1.8 {RA3}, [r2]!; ++ vld1.8 {RB0}, [r2]!; ++ vld1.8 {RB1, RB2}, [r2]!; ++ vld1.8 {RB3}, [r2]!; ++ ++ /* Update IV */ ++ vld1.8 {RT0}, [r2]!; ++ vst1.8 {RT0}, [r3]; ++ mov r3, lr; ++ sub r2, r2, #(8*16); ++ ++ bl __serpent_enc_blk8; ++ ++ vld1.8 {RT0, RT1}, [r2]!; ++ vld1.8 {RT2, RT3}, [r2]!; ++ veor RA4, RA4, RT0; ++ veor RA1, RA1, RT1; ++ vld1.8 {RT0, RT1}, [r2]!; ++ veor RA2, RA2, RT2; ++ veor RA0, RA0, RT3; ++ vld1.8 {RT2, RT3}, [r2]!; ++ veor RB4, RB4, RT0; ++ veor RT0, RT0; ++ veor RB1, RB1, RT1; ++ veor RT1, RT1; ++ veor RB2, RB2, RT2; ++ veor RT2, RT2; ++ veor RB0, RB0, RT3; ++ veor RT3, RT3; ++ ++ vst1.8 {RA4}, [r1]!; ++ vst1.8 {RA1}, [r1]!; ++ veor RA1, RA1; ++ vst1.8 {RA2}, [r1]!; ++ veor RA2, RA2; ++ vst1.8 {RA0}, [r1]!; ++ veor RA0, RA0; ++ vst1.8 {RB4}, [r1]!; ++ veor RB4, RB4; ++ vst1.8 {RB1}, [r1]!; ++ vst1.8 {RB2}, [r1]!; ++ vst1.8 {RB0}, [r1]!; ++ ++ vpop {RA4-RB2}; ++ ++ /* clear the used registers */ ++ veor RA3, RA3; ++ veor RB3, RB3; ++ ++ pop {pc}; ++.size _gcry_serpent_neon_cfb_dec,.-_gcry_serpent_neon_cfb_dec; ++ ++.align 3 ++.globl _gcry_serpent_neon_cbc_dec ++.type _gcry_serpent_neon_cbc_dec,%function; ++_gcry_serpent_neon_cbc_dec: ++ /* input: ++ * r0: ctx, CTX ++ * r1: dst (8 blocks) ++ * r2: src (8 blocks) ++ * r3: iv ++ */ ++ ++ push {lr}; ++ vpush {RA4-RB2}; ++ ++ vld1.8 {RA0, RA1}, [r2]!; ++ vld1.8 {RA2, RA3}, [r2]!; ++ vld1.8 {RB0, RB1}, [r2]!; ++ vld1.8 {RB2, RB3}, [r2]!; ++ sub r2, r2, #(8*16); ++ ++ bl __serpent_dec_blk8; ++ ++ vld1.8 {RB4}, [r3]; ++ vld1.8 {RT0, RT1}, [r2]!; ++ vld1.8 {RT2, RT3}, [r2]!; ++ veor RA0, RA0, RB4; ++ veor RA1, RA1, RT0; ++ veor RA2, RA2, RT1; ++ vld1.8 {RT0, RT1}, [r2]!; ++ veor RA3, RA3, RT2; ++ veor RB0, RB0, RT3; ++ vld1.8 {RT2, RT3}, [r2]!; ++ veor RB1, RB1, RT0; ++ veor RT0, RT0; ++ veor RB2, RB2, RT1; ++ veor RT1, RT1; ++ veor RB3, RB3, RT2; ++ veor RT2, RT2; ++ vst1.8 {RT3}, [r3]; /* store new IV */ ++ veor RT3, RT3; ++ ++ vst1.8 {RA0, RA1}, [r1]!; ++ veor RA0, RA0; ++ veor RA1, RA1; ++ vst1.8 {RA2, RA3}, [r1]!; ++ veor RA2, RA2; ++ vst1.8 {RB0, RB1}, [r1]!; ++ veor RA3, RA3; ++ vst1.8 {RB2, RB3}, [r1]!; ++ veor RB3, RB3; ++ ++ vpop {RA4-RB2}; ++ ++ /* clear the used registers */ ++ veor RB4, RB4; ++ ++ pop {pc}; ++.size _gcry_serpent_neon_cbc_dec,.-_gcry_serpent_neon_cbc_dec; ++ ++#endif diff --git a/recipes/libgcrypt/libgcrypt.recipe b/recipes/libgcrypt/libgcrypt.recipe index 0b6bc1aa..58a4f3bd 100644 --- a/recipes/libgcrypt/libgcrypt.recipe +++ b/recipes/libgcrypt/libgcrypt.recipe @@ -11,7 +11,7 @@ class Recipe(recipe.Recipe): url = 'ftp://ftp.gnupg.org/gcrypt/libgcrypt/libgcrypt-%(version)s.tar.bz2' autoreconf = True configure_options = ' --with-gpg-error-prefix=$CERBERO_PREFIX' - patches = ['0001-Add-missing-file-in-distribution-tarball.patch'] + patches = ['0001-Add-missing-file-in-distribution-tarball.patch', '0002-add-other-missing-file.patch'] deps = ['libgpg-error'] files_libs = ['libgcrypt'] |