mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-02-02 09:46:52 -05:00
001f176
ARM assembly implementation of field_10x26 inner (Wladimir J. van der Laan)
This commit is contained in:
commit
7b0fb18b75
7 changed files with 968 additions and 11 deletions
23
Makefile.am
23
Makefile.am
|
@ -52,12 +52,25 @@ noinst_HEADERS += contrib/lax_der_parsing.c
|
|||
noinst_HEADERS += contrib/lax_der_privatekey_parsing.h
|
||||
noinst_HEADERS += contrib/lax_der_privatekey_parsing.c
|
||||
|
||||
if USE_EXTERNAL_ASM
|
||||
COMMON_LIB = libsecp256k1_common.la
|
||||
noinst_LTLIBRARIES = $(COMMON_LIB)
|
||||
else
|
||||
COMMON_LIB =
|
||||
endif
|
||||
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA = libsecp256k1.pc
|
||||
|
||||
if USE_EXTERNAL_ASM
|
||||
if USE_ASM_ARM
|
||||
libsecp256k1_common_la_SOURCES = src/asm/field_10x26_arm.s
|
||||
endif
|
||||
endif
|
||||
|
||||
libsecp256k1_la_SOURCES = src/secp256k1.c
|
||||
libsecp256k1_la_CPPFLAGS = -DSECP256K1_BUILD -I$(top_srcdir)/include -I$(top_srcdir)/src $(SECP_INCLUDES)
|
||||
libsecp256k1_la_LIBADD = $(JNI_LIB) $(SECP_LIBS)
|
||||
libsecp256k1_la_LIBADD = $(JNI_LIB) $(SECP_LIBS) $(COMMON_LIB)
|
||||
|
||||
libsecp256k1_jni_la_SOURCES = src/java/org_bitcoin_NativeSecp256k1.c src/java/org_bitcoin_Secp256k1Context.c
|
||||
libsecp256k1_jni_la_CPPFLAGS = -DSECP256K1_BUILD $(JNI_INCLUDES)
|
||||
|
@ -66,11 +79,11 @@ noinst_PROGRAMS =
|
|||
if USE_BENCHMARK
|
||||
noinst_PROGRAMS += bench_verify bench_sign bench_internal
|
||||
bench_verify_SOURCES = src/bench_verify.c
|
||||
bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS)
|
||||
bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB)
|
||||
bench_sign_SOURCES = src/bench_sign.c
|
||||
bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS)
|
||||
bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB)
|
||||
bench_internal_SOURCES = src/bench_internal.c
|
||||
bench_internal_LDADD = $(SECP_LIBS)
|
||||
bench_internal_LDADD = $(SECP_LIBS) $(COMMON_LIB)
|
||||
bench_internal_CPPFLAGS = $(SECP_INCLUDES)
|
||||
endif
|
||||
|
||||
|
@ -78,7 +91,7 @@ if USE_TESTS
|
|||
noinst_PROGRAMS += tests
|
||||
tests_SOURCES = src/tests.c
|
||||
tests_CPPFLAGS = -DSECP256K1_BUILD -DVERIFY -I$(top_srcdir)/src -I$(top_srcdir)/include $(SECP_INCLUDES) $(SECP_TEST_INCLUDES)
|
||||
tests_LDADD = $(SECP_LIBS) $(SECP_TEST_LIBS)
|
||||
tests_LDADD = $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB)
|
||||
tests_LDFLAGS = -static
|
||||
TESTS = tests
|
||||
endif
|
||||
|
|
21
configure.ac
21
configure.ac
|
@ -29,6 +29,7 @@ AC_PROG_CC_C89
|
|||
if test x"$ac_cv_prog_cc_c89" = x"no"; then
|
||||
AC_MSG_ERROR([c89 compiler support required])
|
||||
fi
|
||||
AM_PROG_AS
|
||||
|
||||
case $host_os in
|
||||
*darwin*)
|
||||
|
@ -137,8 +138,8 @@ AC_ARG_WITH([bignum], [AS_HELP_STRING([--with-bignum=gmp|no|auto],
|
|||
AC_ARG_WITH([scalar], [AS_HELP_STRING([--with-scalar=64bit|32bit|auto],
|
||||
[Specify scalar implementation. Default is auto])],[req_scalar=$withval], [req_scalar=auto])
|
||||
|
||||
AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|no|auto]
|
||||
[Specify assembly optimizations to use. Default is auto])],[req_asm=$withval], [req_asm=auto])
|
||||
AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm|no|auto]
|
||||
[Specify assembly optimizations to use. Default is auto (experimental: arm)])],[req_asm=$withval], [req_asm=auto])
|
||||
|
||||
AC_CHECK_TYPES([__int128])
|
||||
|
||||
|
@ -165,6 +166,8 @@ else
|
|||
AC_MSG_ERROR([x86_64 assembly optimization requested but not available])
|
||||
fi
|
||||
;;
|
||||
arm)
|
||||
;;
|
||||
no)
|
||||
;;
|
||||
*)
|
||||
|
@ -257,10 +260,15 @@ else
|
|||
fi
|
||||
|
||||
# select assembly optimization
|
||||
use_external_asm=no
|
||||
|
||||
case $set_asm in
|
||||
x86_64)
|
||||
AC_DEFINE(USE_ASM_X86_64, 1, [Define this symbol to enable x86_64 assembly optimizations])
|
||||
;;
|
||||
arm)
|
||||
use_external_asm=yes
|
||||
;;
|
||||
no)
|
||||
;;
|
||||
*)
|
||||
|
@ -371,6 +379,10 @@ fi
|
|||
|
||||
AC_C_BIGENDIAN()
|
||||
|
||||
if test x"$use_external_asm" = x"yes"; then
|
||||
AC_DEFINE(USE_EXTERNAL_ASM, 1, [Define this symbol if an external (non-inline) assembly implementation is used])
|
||||
fi
|
||||
|
||||
AC_MSG_NOTICE([Using assembly optimizations: $set_asm])
|
||||
AC_MSG_NOTICE([Using field implementation: $set_field])
|
||||
AC_MSG_NOTICE([Using bignum implementation: $set_bignum])
|
||||
|
@ -395,6 +407,9 @@ else
|
|||
if test x"$enable_module_ecdh" = x"yes"; then
|
||||
AC_MSG_ERROR([ECDH module is experimental. Use --enable-experimental to allow.])
|
||||
fi
|
||||
if test x"$set_asm" = x"arm"; then
|
||||
AC_MSG_ERROR([ARM assembly optimization is experimental. Use --enable-experimental to allow.])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_CONFIG_HEADERS([src/libsecp256k1-config.h])
|
||||
|
@ -411,6 +426,8 @@ AM_CONDITIONAL([ENABLE_MODULE_ECDH], [test x"$enable_module_ecdh" = x"yes"])
|
|||
AM_CONDITIONAL([ENABLE_MODULE_SCHNORR], [test x"$enable_module_schnorr" = x"yes"])
|
||||
AM_CONDITIONAL([ENABLE_MODULE_RECOVERY], [test x"$enable_module_recovery" = x"yes"])
|
||||
AM_CONDITIONAL([USE_JNI], [test x"$use_jni" == x"yes"])
|
||||
AM_CONDITIONAL([USE_EXTERNAL_ASM], [test x"$use_external_asm" = x"yes"])
|
||||
AM_CONDITIONAL([USE_ASM_ARM], [test x"$set_asm" = x"arm"])
|
||||
|
||||
dnl make sure nothing new is exported so that we don't break the cache
|
||||
PKGCONFIG_PATH_TEMP="$PKG_CONFIG_PATH"
|
||||
|
|
919
src/asm/field_10x26_arm.s
Normal file
919
src/asm/field_10x26_arm.s
Normal file
|
@ -0,0 +1,919 @@
|
|||
@ vim: set tabstop=8 softtabstop=8 shiftwidth=8 noexpandtab syntax=armasm:
|
||||
/**********************************************************************
|
||||
* Copyright (c) 2014 Wladimir J. van der Laan *
|
||||
* Distributed under the MIT software license, see the accompanying *
|
||||
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
|
||||
**********************************************************************/
|
||||
/*
|
||||
ARM implementation of field_10x26 inner loops.
|
||||
|
||||
Note:
|
||||
|
||||
- To avoid unnecessary loads and make use of available registers, two
|
||||
'passes' have every time been interleaved, with the odd passes accumulating c' and d'
|
||||
which will be added to c and d respectively in the the even passes
|
||||
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.arch armv7-a
|
||||
@ eabi attributes - see readelf -A
|
||||
.eabi_attribute 8, 1 @ Tag_ARM_ISA_use = yes
|
||||
.eabi_attribute 9, 0 @ Tag_Thumb_ISA_use = no
|
||||
.eabi_attribute 10, 0 @ Tag_FP_arch = none
|
||||
.eabi_attribute 24, 1 @ Tag_ABI_align_needed = 8-byte
|
||||
.eabi_attribute 25, 1 @ Tag_ABI_align_preserved = 8-byte, except leaf SP
|
||||
.eabi_attribute 30, 2 @ Tag_ABI_optimization_goals = Agressive Speed
|
||||
.eabi_attribute 34, 1 @ Tag_CPU_unaligned_access = v6
|
||||
.text
|
||||
|
||||
@ Field constants
|
||||
.set field_R0, 0x3d10
|
||||
.set field_R1, 0x400
|
||||
.set field_not_M, 0xfc000000 @ ~M = ~0x3ffffff
|
||||
|
||||
.align 2
|
||||
.global secp256k1_fe_mul_inner
|
||||
.type secp256k1_fe_mul_inner, %function
|
||||
@ Arguments:
|
||||
@ r0 r Restrict: can overlap with a, not with b
|
||||
@ r1 a
|
||||
@ r2 b
|
||||
@ Stack (total 4+10*4 = 44)
|
||||
@ sp + #0 saved 'r' pointer
|
||||
@ sp + #4 + 4*X t0,t1,t2,t3,t4,t5,t6,t7,u8,t9
|
||||
secp256k1_fe_mul_inner:
|
||||
stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14}
|
||||
sub sp, sp, #48 @ frame=44 + alignment
|
||||
str r0, [sp, #0] @ save result address, we need it only at the end
|
||||
|
||||
/******************************************
|
||||
* Main computation code.
|
||||
******************************************
|
||||
|
||||
Allocation:
|
||||
r0,r14,r7,r8 scratch
|
||||
r1 a (pointer)
|
||||
r2 b (pointer)
|
||||
r3:r4 c
|
||||
r5:r6 d
|
||||
r11:r12 c'
|
||||
r9:r10 d'
|
||||
|
||||
Note: do not write to r[] here, it may overlap with a[]
|
||||
*/
|
||||
|
||||
/* A - interleaved with B */
|
||||
ldr r7, [r1, #0*4] @ a[0]
|
||||
ldr r8, [r2, #9*4] @ b[9]
|
||||
ldr r0, [r1, #1*4] @ a[1]
|
||||
umull r5, r6, r7, r8 @ d = a[0] * b[9]
|
||||
ldr r14, [r2, #8*4] @ b[8]
|
||||
umull r9, r10, r0, r8 @ d' = a[1] * b[9]
|
||||
ldr r7, [r1, #2*4] @ a[2]
|
||||
umlal r5, r6, r0, r14 @ d += a[1] * b[8]
|
||||
ldr r8, [r2, #7*4] @ b[7]
|
||||
umlal r9, r10, r7, r14 @ d' += a[2] * b[8]
|
||||
ldr r0, [r1, #3*4] @ a[3]
|
||||
umlal r5, r6, r7, r8 @ d += a[2] * b[7]
|
||||
ldr r14, [r2, #6*4] @ b[6]
|
||||
umlal r9, r10, r0, r8 @ d' += a[3] * b[7]
|
||||
ldr r7, [r1, #4*4] @ a[4]
|
||||
umlal r5, r6, r0, r14 @ d += a[3] * b[6]
|
||||
ldr r8, [r2, #5*4] @ b[5]
|
||||
umlal r9, r10, r7, r14 @ d' += a[4] * b[6]
|
||||
ldr r0, [r1, #5*4] @ a[5]
|
||||
umlal r5, r6, r7, r8 @ d += a[4] * b[5]
|
||||
ldr r14, [r2, #4*4] @ b[4]
|
||||
umlal r9, r10, r0, r8 @ d' += a[5] * b[5]
|
||||
ldr r7, [r1, #6*4] @ a[6]
|
||||
umlal r5, r6, r0, r14 @ d += a[5] * b[4]
|
||||
ldr r8, [r2, #3*4] @ b[3]
|
||||
umlal r9, r10, r7, r14 @ d' += a[6] * b[4]
|
||||
ldr r0, [r1, #7*4] @ a[7]
|
||||
umlal r5, r6, r7, r8 @ d += a[6] * b[3]
|
||||
ldr r14, [r2, #2*4] @ b[2]
|
||||
umlal r9, r10, r0, r8 @ d' += a[7] * b[3]
|
||||
ldr r7, [r1, #8*4] @ a[8]
|
||||
umlal r5, r6, r0, r14 @ d += a[7] * b[2]
|
||||
ldr r8, [r2, #1*4] @ b[1]
|
||||
umlal r9, r10, r7, r14 @ d' += a[8] * b[2]
|
||||
ldr r0, [r1, #9*4] @ a[9]
|
||||
umlal r5, r6, r7, r8 @ d += a[8] * b[1]
|
||||
ldr r14, [r2, #0*4] @ b[0]
|
||||
umlal r9, r10, r0, r8 @ d' += a[9] * b[1]
|
||||
ldr r7, [r1, #0*4] @ a[0]
|
||||
umlal r5, r6, r0, r14 @ d += a[9] * b[0]
|
||||
@ r7,r14 used in B
|
||||
|
||||
bic r0, r5, field_not_M @ t9 = d & M
|
||||
str r0, [sp, #4 + 4*9]
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
|
||||
/* B */
|
||||
umull r3, r4, r7, r14 @ c = a[0] * b[0]
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u0 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u0 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t0 = c & M
|
||||
str r14, [sp, #4 + 0*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u0 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* C - interleaved with D */
|
||||
ldr r7, [r1, #0*4] @ a[0]
|
||||
ldr r8, [r2, #2*4] @ b[2]
|
||||
ldr r14, [r2, #1*4] @ b[1]
|
||||
umull r11, r12, r7, r8 @ c' = a[0] * b[2]
|
||||
ldr r0, [r1, #1*4] @ a[1]
|
||||
umlal r3, r4, r7, r14 @ c += a[0] * b[1]
|
||||
ldr r8, [r2, #0*4] @ b[0]
|
||||
umlal r11, r12, r0, r14 @ c' += a[1] * b[1]
|
||||
ldr r7, [r1, #2*4] @ a[2]
|
||||
umlal r3, r4, r0, r8 @ c += a[1] * b[0]
|
||||
ldr r14, [r2, #9*4] @ b[9]
|
||||
umlal r11, r12, r7, r8 @ c' += a[2] * b[0]
|
||||
ldr r0, [r1, #3*4] @ a[3]
|
||||
umlal r5, r6, r7, r14 @ d += a[2] * b[9]
|
||||
ldr r8, [r2, #8*4] @ b[8]
|
||||
umull r9, r10, r0, r14 @ d' = a[3] * b[9]
|
||||
ldr r7, [r1, #4*4] @ a[4]
|
||||
umlal r5, r6, r0, r8 @ d += a[3] * b[8]
|
||||
ldr r14, [r2, #7*4] @ b[7]
|
||||
umlal r9, r10, r7, r8 @ d' += a[4] * b[8]
|
||||
ldr r0, [r1, #5*4] @ a[5]
|
||||
umlal r5, r6, r7, r14 @ d += a[4] * b[7]
|
||||
ldr r8, [r2, #6*4] @ b[6]
|
||||
umlal r9, r10, r0, r14 @ d' += a[5] * b[7]
|
||||
ldr r7, [r1, #6*4] @ a[6]
|
||||
umlal r5, r6, r0, r8 @ d += a[5] * b[6]
|
||||
ldr r14, [r2, #5*4] @ b[5]
|
||||
umlal r9, r10, r7, r8 @ d' += a[6] * b[6]
|
||||
ldr r0, [r1, #7*4] @ a[7]
|
||||
umlal r5, r6, r7, r14 @ d += a[6] * b[5]
|
||||
ldr r8, [r2, #4*4] @ b[4]
|
||||
umlal r9, r10, r0, r14 @ d' += a[7] * b[5]
|
||||
ldr r7, [r1, #8*4] @ a[8]
|
||||
umlal r5, r6, r0, r8 @ d += a[7] * b[4]
|
||||
ldr r14, [r2, #3*4] @ b[3]
|
||||
umlal r9, r10, r7, r8 @ d' += a[8] * b[4]
|
||||
ldr r0, [r1, #9*4] @ a[9]
|
||||
umlal r5, r6, r7, r14 @ d += a[8] * b[3]
|
||||
ldr r8, [r2, #2*4] @ b[2]
|
||||
umlal r9, r10, r0, r14 @ d' += a[9] * b[3]
|
||||
umlal r5, r6, r0, r8 @ d += a[9] * b[2]
|
||||
|
||||
bic r0, r5, field_not_M @ u1 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u1 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t1 = c & M
|
||||
str r14, [sp, #4 + 1*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u1 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* D */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u2 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u2 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t2 = c & M
|
||||
str r14, [sp, #4 + 2*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u2 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* E - interleaved with F */
|
||||
ldr r7, [r1, #0*4] @ a[0]
|
||||
ldr r8, [r2, #4*4] @ b[4]
|
||||
umull r11, r12, r7, r8 @ c' = a[0] * b[4]
|
||||
ldr r8, [r2, #3*4] @ b[3]
|
||||
umlal r3, r4, r7, r8 @ c += a[0] * b[3]
|
||||
ldr r7, [r1, #1*4] @ a[1]
|
||||
umlal r11, r12, r7, r8 @ c' += a[1] * b[3]
|
||||
ldr r8, [r2, #2*4] @ b[2]
|
||||
umlal r3, r4, r7, r8 @ c += a[1] * b[2]
|
||||
ldr r7, [r1, #2*4] @ a[2]
|
||||
umlal r11, r12, r7, r8 @ c' += a[2] * b[2]
|
||||
ldr r8, [r2, #1*4] @ b[1]
|
||||
umlal r3, r4, r7, r8 @ c += a[2] * b[1]
|
||||
ldr r7, [r1, #3*4] @ a[3]
|
||||
umlal r11, r12, r7, r8 @ c' += a[3] * b[1]
|
||||
ldr r8, [r2, #0*4] @ b[0]
|
||||
umlal r3, r4, r7, r8 @ c += a[3] * b[0]
|
||||
ldr r7, [r1, #4*4] @ a[4]
|
||||
umlal r11, r12, r7, r8 @ c' += a[4] * b[0]
|
||||
ldr r8, [r2, #9*4] @ b[9]
|
||||
umlal r5, r6, r7, r8 @ d += a[4] * b[9]
|
||||
ldr r7, [r1, #5*4] @ a[5]
|
||||
umull r9, r10, r7, r8 @ d' = a[5] * b[9]
|
||||
ldr r8, [r2, #8*4] @ b[8]
|
||||
umlal r5, r6, r7, r8 @ d += a[5] * b[8]
|
||||
ldr r7, [r1, #6*4] @ a[6]
|
||||
umlal r9, r10, r7, r8 @ d' += a[6] * b[8]
|
||||
ldr r8, [r2, #7*4] @ b[7]
|
||||
umlal r5, r6, r7, r8 @ d += a[6] * b[7]
|
||||
ldr r7, [r1, #7*4] @ a[7]
|
||||
umlal r9, r10, r7, r8 @ d' += a[7] * b[7]
|
||||
ldr r8, [r2, #6*4] @ b[6]
|
||||
umlal r5, r6, r7, r8 @ d += a[7] * b[6]
|
||||
ldr r7, [r1, #8*4] @ a[8]
|
||||
umlal r9, r10, r7, r8 @ d' += a[8] * b[6]
|
||||
ldr r8, [r2, #5*4] @ b[5]
|
||||
umlal r5, r6, r7, r8 @ d += a[8] * b[5]
|
||||
ldr r7, [r1, #9*4] @ a[9]
|
||||
umlal r9, r10, r7, r8 @ d' += a[9] * b[5]
|
||||
ldr r8, [r2, #4*4] @ b[4]
|
||||
umlal r5, r6, r7, r8 @ d += a[9] * b[4]
|
||||
|
||||
bic r0, r5, field_not_M @ u3 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u3 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t3 = c & M
|
||||
str r14, [sp, #4 + 3*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u3 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* F */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u4 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u4 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t4 = c & M
|
||||
str r14, [sp, #4 + 4*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u4 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* G - interleaved with H */
|
||||
ldr r7, [r1, #0*4] @ a[0]
|
||||
ldr r8, [r2, #6*4] @ b[6]
|
||||
ldr r14, [r2, #5*4] @ b[5]
|
||||
umull r11, r12, r7, r8 @ c' = a[0] * b[6]
|
||||
ldr r0, [r1, #1*4] @ a[1]
|
||||
umlal r3, r4, r7, r14 @ c += a[0] * b[5]
|
||||
ldr r8, [r2, #4*4] @ b[4]
|
||||
umlal r11, r12, r0, r14 @ c' += a[1] * b[5]
|
||||
ldr r7, [r1, #2*4] @ a[2]
|
||||
umlal r3, r4, r0, r8 @ c += a[1] * b[4]
|
||||
ldr r14, [r2, #3*4] @ b[3]
|
||||
umlal r11, r12, r7, r8 @ c' += a[2] * b[4]
|
||||
ldr r0, [r1, #3*4] @ a[3]
|
||||
umlal r3, r4, r7, r14 @ c += a[2] * b[3]
|
||||
ldr r8, [r2, #2*4] @ b[2]
|
||||
umlal r11, r12, r0, r14 @ c' += a[3] * b[3]
|
||||
ldr r7, [r1, #4*4] @ a[4]
|
||||
umlal r3, r4, r0, r8 @ c += a[3] * b[2]
|
||||
ldr r14, [r2, #1*4] @ b[1]
|
||||
umlal r11, r12, r7, r8 @ c' += a[4] * b[2]
|
||||
ldr r0, [r1, #5*4] @ a[5]
|
||||
umlal r3, r4, r7, r14 @ c += a[4] * b[1]
|
||||
ldr r8, [r2, #0*4] @ b[0]
|
||||
umlal r11, r12, r0, r14 @ c' += a[5] * b[1]
|
||||
ldr r7, [r1, #6*4] @ a[6]
|
||||
umlal r3, r4, r0, r8 @ c += a[5] * b[0]
|
||||
ldr r14, [r2, #9*4] @ b[9]
|
||||
umlal r11, r12, r7, r8 @ c' += a[6] * b[0]
|
||||
ldr r0, [r1, #7*4] @ a[7]
|
||||
umlal r5, r6, r7, r14 @ d += a[6] * b[9]
|
||||
ldr r8, [r2, #8*4] @ b[8]
|
||||
umull r9, r10, r0, r14 @ d' = a[7] * b[9]
|
||||
ldr r7, [r1, #8*4] @ a[8]
|
||||
umlal r5, r6, r0, r8 @ d += a[7] * b[8]
|
||||
ldr r14, [r2, #7*4] @ b[7]
|
||||
umlal r9, r10, r7, r8 @ d' += a[8] * b[8]
|
||||
ldr r0, [r1, #9*4] @ a[9]
|
||||
umlal r5, r6, r7, r14 @ d += a[8] * b[7]
|
||||
ldr r8, [r2, #6*4] @ b[6]
|
||||
umlal r9, r10, r0, r14 @ d' += a[9] * b[7]
|
||||
umlal r5, r6, r0, r8 @ d += a[9] * b[6]
|
||||
|
||||
bic r0, r5, field_not_M @ u5 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u5 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t5 = c & M
|
||||
str r14, [sp, #4 + 5*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u5 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* H */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u6 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u6 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t6 = c & M
|
||||
str r14, [sp, #4 + 6*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u6 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* I - interleaved with J */
|
||||
ldr r8, [r2, #8*4] @ b[8]
|
||||
ldr r7, [r1, #0*4] @ a[0]
|
||||
ldr r14, [r2, #7*4] @ b[7]
|
||||
umull r11, r12, r7, r8 @ c' = a[0] * b[8]
|
||||
ldr r0, [r1, #1*4] @ a[1]
|
||||
umlal r3, r4, r7, r14 @ c += a[0] * b[7]
|
||||
ldr r8, [r2, #6*4] @ b[6]
|
||||
umlal r11, r12, r0, r14 @ c' += a[1] * b[7]
|
||||
ldr r7, [r1, #2*4] @ a[2]
|
||||
umlal r3, r4, r0, r8 @ c += a[1] * b[6]
|
||||
ldr r14, [r2, #5*4] @ b[5]
|
||||
umlal r11, r12, r7, r8 @ c' += a[2] * b[6]
|
||||
ldr r0, [r1, #3*4] @ a[3]
|
||||
umlal r3, r4, r7, r14 @ c += a[2] * b[5]
|
||||
ldr r8, [r2, #4*4] @ b[4]
|
||||
umlal r11, r12, r0, r14 @ c' += a[3] * b[5]
|
||||
ldr r7, [r1, #4*4] @ a[4]
|
||||
umlal r3, r4, r0, r8 @ c += a[3] * b[4]
|
||||
ldr r14, [r2, #3*4] @ b[3]
|
||||
umlal r11, r12, r7, r8 @ c' += a[4] * b[4]
|
||||
ldr r0, [r1, #5*4] @ a[5]
|
||||
umlal r3, r4, r7, r14 @ c += a[4] * b[3]
|
||||
ldr r8, [r2, #2*4] @ b[2]
|
||||
umlal r11, r12, r0, r14 @ c' += a[5] * b[3]
|
||||
ldr r7, [r1, #6*4] @ a[6]
|
||||
umlal r3, r4, r0, r8 @ c += a[5] * b[2]
|
||||
ldr r14, [r2, #1*4] @ b[1]
|
||||
umlal r11, r12, r7, r8 @ c' += a[6] * b[2]
|
||||
ldr r0, [r1, #7*4] @ a[7]
|
||||
umlal r3, r4, r7, r14 @ c += a[6] * b[1]
|
||||
ldr r8, [r2, #0*4] @ b[0]
|
||||
umlal r11, r12, r0, r14 @ c' += a[7] * b[1]
|
||||
ldr r7, [r1, #8*4] @ a[8]
|
||||
umlal r3, r4, r0, r8 @ c += a[7] * b[0]
|
||||
ldr r14, [r2, #9*4] @ b[9]
|
||||
umlal r11, r12, r7, r8 @ c' += a[8] * b[0]
|
||||
ldr r0, [r1, #9*4] @ a[9]
|
||||
umlal r5, r6, r7, r14 @ d += a[8] * b[9]
|
||||
ldr r8, [r2, #8*4] @ b[8]
|
||||
umull r9, r10, r0, r14 @ d' = a[9] * b[9]
|
||||
umlal r5, r6, r0, r8 @ d += a[9] * b[8]
|
||||
|
||||
bic r0, r5, field_not_M @ u7 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u7 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
bic r14, r3, field_not_M @ t7 = c & M
|
||||
str r14, [sp, #4 + 7*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u7 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* J */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u8 = d & M
|
||||
str r0, [sp, #4 + 8*4]
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u8 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/******************************************
|
||||
* compute and write back result
|
||||
******************************************
|
||||
Allocation:
|
||||
r0 r
|
||||
r3:r4 c
|
||||
r5:r6 d
|
||||
r7 t0
|
||||
r8 t1
|
||||
r9 t2
|
||||
r11 u8
|
||||
r12 t9
|
||||
r1,r2,r10,r14 scratch
|
||||
|
||||
Note: do not read from a[] after here, it may overlap with r[]
|
||||
*/
|
||||
ldr r0, [sp, #0]
|
||||
add r1, sp, #4 + 3*4 @ r[3..7] = t3..7, r11=u8, r12=t9
|
||||
ldmia r1, {r2,r7,r8,r9,r10,r11,r12}
|
||||
add r1, r0, #3*4
|
||||
stmia r1, {r2,r7,r8,r9,r10}
|
||||
|
||||
bic r2, r3, field_not_M @ r[8] = c & M
|
||||
str r2, [r0, #8*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u8 * R1
|
||||
umlal r3, r4, r11, r14
|
||||
movw r14, field_R0 @ c += d * R0
|
||||
umlal r3, r4, r5, r14
|
||||
adds r3, r3, r12 @ c += t9
|
||||
adc r4, r4, #0
|
||||
|
||||
add r1, sp, #4 + 0*4 @ r7,r8,r9 = t0,t1,t2
|
||||
ldmia r1, {r7,r8,r9}
|
||||
|
||||
ubfx r2, r3, #0, #22 @ r[9] = c & (M >> 4)
|
||||
str r2, [r0, #9*4]
|
||||
mov r3, r3, lsr #22 @ c >>= 22
|
||||
orr r3, r3, r4, asl #10
|
||||
mov r4, r4, lsr #22
|
||||
movw r14, field_R1 << 4 @ c += d * (R1 << 4)
|
||||
umlal r3, r4, r5, r14
|
||||
|
||||
movw r14, field_R0 >> 4 @ d = c * (R0 >> 4) + t0 (64x64 multiply+add)
|
||||
umull r5, r6, r3, r14 @ d = c.lo * (R0 >> 4)
|
||||
adds r5, r5, r7 @ d.lo += t0
|
||||
mla r6, r14, r4, r6 @ d.hi += c.hi * (R0 >> 4)
|
||||
adc r6, r6, 0 @ d.hi += carry
|
||||
|
||||
bic r2, r5, field_not_M @ r[0] = d & M
|
||||
str r2, [r0, #0*4]
|
||||
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
|
||||
movw r14, field_R1 >> 4 @ d += c * (R1 >> 4) + t1 (64x64 multiply+add)
|
||||
umull r1, r2, r3, r14 @ tmp = c.lo * (R1 >> 4)
|
||||
adds r5, r5, r8 @ d.lo += t1
|
||||
adc r6, r6, #0 @ d.hi += carry
|
||||
adds r5, r5, r1 @ d.lo += tmp.lo
|
||||
mla r2, r14, r4, r2 @ tmp.hi += c.hi * (R1 >> 4)
|
||||
adc r6, r6, r2 @ d.hi += carry + tmp.hi
|
||||
|
||||
bic r2, r5, field_not_M @ r[1] = d & M
|
||||
str r2, [r0, #1*4]
|
||||
mov r5, r5, lsr #26 @ d >>= 26 (ignore hi)
|
||||
orr r5, r5, r6, asl #6
|
||||
|
||||
add r5, r5, r9 @ d += t2
|
||||
str r5, [r0, #2*4] @ r[2] = d
|
||||
|
||||
add sp, sp, #48
|
||||
ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
.size secp256k1_fe_mul_inner, .-secp256k1_fe_mul_inner
|
||||
|
||||
.align 2
|
||||
.global secp256k1_fe_sqr_inner
|
||||
.type secp256k1_fe_sqr_inner, %function
|
||||
@ Arguments:
|
||||
@ r0 r Can overlap with a
|
||||
@ r1 a
|
||||
@ Stack (total 4+10*4 = 44)
|
||||
@ sp + #0 saved 'r' pointer
|
||||
@ sp + #4 + 4*X t0,t1,t2,t3,t4,t5,t6,t7,u8,t9
|
||||
secp256k1_fe_sqr_inner:
|
||||
stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14}
|
||||
sub sp, sp, #48 @ frame=44 + alignment
|
||||
str r0, [sp, #0] @ save result address, we need it only at the end
|
||||
/******************************************
|
||||
* Main computation code.
|
||||
******************************************
|
||||
|
||||
Allocation:
|
||||
r0,r14,r2,r7,r8 scratch
|
||||
r1 a (pointer)
|
||||
r3:r4 c
|
||||
r5:r6 d
|
||||
r11:r12 c'
|
||||
r9:r10 d'
|
||||
|
||||
Note: do not write to r[] here, it may overlap with a[]
|
||||
*/
|
||||
/* A interleaved with B */
|
||||
ldr r0, [r1, #1*4] @ a[1]*2
|
||||
ldr r7, [r1, #0*4] @ a[0]
|
||||
mov r0, r0, asl #1
|
||||
ldr r14, [r1, #9*4] @ a[9]
|
||||
umull r3, r4, r7, r7 @ c = a[0] * a[0]
|
||||
ldr r8, [r1, #8*4] @ a[8]
|
||||
mov r7, r7, asl #1
|
||||
umull r5, r6, r7, r14 @ d = a[0]*2 * a[9]
|
||||
ldr r7, [r1, #2*4] @ a[2]*2
|
||||
umull r9, r10, r0, r14 @ d' = a[1]*2 * a[9]
|
||||
ldr r14, [r1, #7*4] @ a[7]
|
||||
umlal r5, r6, r0, r8 @ d += a[1]*2 * a[8]
|
||||
mov r7, r7, asl #1
|
||||
ldr r0, [r1, #3*4] @ a[3]*2
|
||||
umlal r9, r10, r7, r8 @ d' += a[2]*2 * a[8]
|
||||
ldr r8, [r1, #6*4] @ a[6]
|
||||
umlal r5, r6, r7, r14 @ d += a[2]*2 * a[7]
|
||||
mov r0, r0, asl #1
|
||||
ldr r7, [r1, #4*4] @ a[4]*2
|
||||
umlal r9, r10, r0, r14 @ d' += a[3]*2 * a[7]
|
||||
ldr r14, [r1, #5*4] @ a[5]
|
||||
mov r7, r7, asl #1
|
||||
umlal r5, r6, r0, r8 @ d += a[3]*2 * a[6]
|
||||
umlal r9, r10, r7, r8 @ d' += a[4]*2 * a[6]
|
||||
umlal r5, r6, r7, r14 @ d += a[4]*2 * a[5]
|
||||
umlal r9, r10, r14, r14 @ d' += a[5] * a[5]
|
||||
|
||||
bic r0, r5, field_not_M @ t9 = d & M
|
||||
str r0, [sp, #4 + 9*4]
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
|
||||
/* B */
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u0 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u0 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t0 = c & M
|
||||
str r14, [sp, #4 + 0*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u0 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* C interleaved with D */
|
||||
ldr r0, [r1, #0*4] @ a[0]*2
|
||||
ldr r14, [r1, #1*4] @ a[1]
|
||||
mov r0, r0, asl #1
|
||||
ldr r8, [r1, #2*4] @ a[2]
|
||||
umlal r3, r4, r0, r14 @ c += a[0]*2 * a[1]
|
||||
mov r7, r8, asl #1 @ a[2]*2
|
||||
umull r11, r12, r14, r14 @ c' = a[1] * a[1]
|
||||
ldr r14, [r1, #9*4] @ a[9]
|
||||
umlal r11, r12, r0, r8 @ c' += a[0]*2 * a[2]
|
||||
ldr r0, [r1, #3*4] @ a[3]*2
|
||||
ldr r8, [r1, #8*4] @ a[8]
|
||||
umlal r5, r6, r7, r14 @ d += a[2]*2 * a[9]
|
||||
mov r0, r0, asl #1
|
||||
ldr r7, [r1, #4*4] @ a[4]*2
|
||||
umull r9, r10, r0, r14 @ d' = a[3]*2 * a[9]
|
||||
ldr r14, [r1, #7*4] @ a[7]
|
||||
umlal r5, r6, r0, r8 @ d += a[3]*2 * a[8]
|
||||
mov r7, r7, asl #1
|
||||
ldr r0, [r1, #5*4] @ a[5]*2
|
||||
umlal r9, r10, r7, r8 @ d' += a[4]*2 * a[8]
|
||||
ldr r8, [r1, #6*4] @ a[6]
|
||||
mov r0, r0, asl #1
|
||||
umlal r5, r6, r7, r14 @ d += a[4]*2 * a[7]
|
||||
umlal r9, r10, r0, r14 @ d' += a[5]*2 * a[7]
|
||||
umlal r5, r6, r0, r8 @ d += a[5]*2 * a[6]
|
||||
umlal r9, r10, r8, r8 @ d' += a[6] * a[6]
|
||||
|
||||
bic r0, r5, field_not_M @ u1 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u1 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t1 = c & M
|
||||
str r14, [sp, #4 + 1*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u1 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* D */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u2 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u2 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t2 = c & M
|
||||
str r14, [sp, #4 + 2*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u2 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* E interleaved with F */
|
||||
ldr r7, [r1, #0*4] @ a[0]*2
|
||||
ldr r0, [r1, #1*4] @ a[1]*2
|
||||
ldr r14, [r1, #2*4] @ a[2]
|
||||
mov r7, r7, asl #1
|
||||
ldr r8, [r1, #3*4] @ a[3]
|
||||
ldr r2, [r1, #4*4]
|
||||
umlal r3, r4, r7, r8 @ c += a[0]*2 * a[3]
|
||||
mov r0, r0, asl #1
|
||||
umull r11, r12, r7, r2 @ c' = a[0]*2 * a[4]
|
||||
mov r2, r2, asl #1 @ a[4]*2
|
||||
umlal r11, r12, r0, r8 @ c' += a[1]*2 * a[3]
|
||||
ldr r8, [r1, #9*4] @ a[9]
|
||||
umlal r3, r4, r0, r14 @ c += a[1]*2 * a[2]
|
||||
ldr r0, [r1, #5*4] @ a[5]*2
|
||||
umlal r11, r12, r14, r14 @ c' += a[2] * a[2]
|
||||
ldr r14, [r1, #8*4] @ a[8]
|
||||
mov r0, r0, asl #1
|
||||
umlal r5, r6, r2, r8 @ d += a[4]*2 * a[9]
|
||||
ldr r7, [r1, #6*4] @ a[6]*2
|
||||
umull r9, r10, r0, r8 @ d' = a[5]*2 * a[9]
|
||||
mov r7, r7, asl #1
|
||||
ldr r8, [r1, #7*4] @ a[7]
|
||||
umlal r5, r6, r0, r14 @ d += a[5]*2 * a[8]
|
||||
umlal r9, r10, r7, r14 @ d' += a[6]*2 * a[8]
|
||||
umlal r5, r6, r7, r8 @ d += a[6]*2 * a[7]
|
||||
umlal r9, r10, r8, r8 @ d' += a[7] * a[7]
|
||||
|
||||
bic r0, r5, field_not_M @ u3 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u3 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t3 = c & M
|
||||
str r14, [sp, #4 + 3*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u3 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* F */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u4 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u4 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t4 = c & M
|
||||
str r14, [sp, #4 + 4*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u4 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* G interleaved with H */
|
||||
ldr r7, [r1, #0*4] @ a[0]*2
|
||||
ldr r0, [r1, #1*4] @ a[1]*2
|
||||
mov r7, r7, asl #1
|
||||
ldr r8, [r1, #5*4] @ a[5]
|
||||
ldr r2, [r1, #6*4] @ a[6]
|
||||
umlal r3, r4, r7, r8 @ c += a[0]*2 * a[5]
|
||||
ldr r14, [r1, #4*4] @ a[4]
|
||||
mov r0, r0, asl #1
|
||||
umull r11, r12, r7, r2 @ c' = a[0]*2 * a[6]
|
||||
ldr r7, [r1, #2*4] @ a[2]*2
|
||||
umlal r11, r12, r0, r8 @ c' += a[1]*2 * a[5]
|
||||
mov r7, r7, asl #1
|
||||
ldr r8, [r1, #3*4] @ a[3]
|
||||
umlal r3, r4, r0, r14 @ c += a[1]*2 * a[4]
|
||||
mov r0, r2, asl #1 @ a[6]*2
|
||||
umlal r11, r12, r7, r14 @ c' += a[2]*2 * a[4]
|
||||
ldr r14, [r1, #9*4] @ a[9]
|
||||
umlal r3, r4, r7, r8 @ c += a[2]*2 * a[3]
|
||||
ldr r7, [r1, #7*4] @ a[7]*2
|
||||
umlal r11, r12, r8, r8 @ c' += a[3] * a[3]
|
||||
mov r7, r7, asl #1
|
||||
ldr r8, [r1, #8*4] @ a[8]
|
||||
umlal r5, r6, r0, r14 @ d += a[6]*2 * a[9]
|
||||
umull r9, r10, r7, r14 @ d' = a[7]*2 * a[9]
|
||||
umlal r5, r6, r7, r8 @ d += a[7]*2 * a[8]
|
||||
umlal r9, r10, r8, r8 @ d' += a[8] * a[8]
|
||||
|
||||
bic r0, r5, field_not_M @ u5 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u5 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t5 = c & M
|
||||
str r14, [sp, #4 + 5*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u5 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* H */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
adds r5, r5, r9 @ d += d'
|
||||
adc r6, r6, r10
|
||||
|
||||
bic r0, r5, field_not_M @ u6 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u6 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t6 = c & M
|
||||
str r14, [sp, #4 + 6*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u6 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* I interleaved with J */
|
||||
ldr r7, [r1, #0*4] @ a[0]*2
|
||||
ldr r0, [r1, #1*4] @ a[1]*2
|
||||
mov r7, r7, asl #1
|
||||
ldr r8, [r1, #7*4] @ a[7]
|
||||
ldr r2, [r1, #8*4] @ a[8]
|
||||
umlal r3, r4, r7, r8 @ c += a[0]*2 * a[7]
|
||||
ldr r14, [r1, #6*4] @ a[6]
|
||||
mov r0, r0, asl #1
|
||||
umull r11, r12, r7, r2 @ c' = a[0]*2 * a[8]
|
||||
ldr r7, [r1, #2*4] @ a[2]*2
|
||||
umlal r11, r12, r0, r8 @ c' += a[1]*2 * a[7]
|
||||
ldr r8, [r1, #5*4] @ a[5]
|
||||
umlal r3, r4, r0, r14 @ c += a[1]*2 * a[6]
|
||||
ldr r0, [r1, #3*4] @ a[3]*2
|
||||
mov r7, r7, asl #1
|
||||
umlal r11, r12, r7, r14 @ c' += a[2]*2 * a[6]
|
||||
ldr r14, [r1, #4*4] @ a[4]
|
||||
mov r0, r0, asl #1
|
||||
umlal r3, r4, r7, r8 @ c += a[2]*2 * a[5]
|
||||
mov r2, r2, asl #1 @ a[8]*2
|
||||
umlal r11, r12, r0, r8 @ c' += a[3]*2 * a[5]
|
||||
umlal r3, r4, r0, r14 @ c += a[3]*2 * a[4]
|
||||
umlal r11, r12, r14, r14 @ c' += a[4] * a[4]
|
||||
ldr r8, [r1, #9*4] @ a[9]
|
||||
umlal r5, r6, r2, r8 @ d += a[8]*2 * a[9]
|
||||
@ r8 will be used in J
|
||||
|
||||
bic r0, r5, field_not_M @ u7 = d & M
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u7 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
bic r14, r3, field_not_M @ t7 = c & M
|
||||
str r14, [sp, #4 + 7*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u7 * R1
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/* J */
|
||||
adds r3, r3, r11 @ c += c'
|
||||
adc r4, r4, r12
|
||||
umlal r5, r6, r8, r8 @ d += a[9] * a[9]
|
||||
|
||||
bic r0, r5, field_not_M @ u8 = d & M
|
||||
str r0, [sp, #4 + 8*4]
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
movw r14, field_R0 @ c += u8 * R0
|
||||
umlal r3, r4, r0, r14
|
||||
|
||||
/******************************************
|
||||
* compute and write back result
|
||||
******************************************
|
||||
Allocation:
|
||||
r0 r
|
||||
r3:r4 c
|
||||
r5:r6 d
|
||||
r7 t0
|
||||
r8 t1
|
||||
r9 t2
|
||||
r11 u8
|
||||
r12 t9
|
||||
r1,r2,r10,r14 scratch
|
||||
|
||||
Note: do not read from a[] after here, it may overlap with r[]
|
||||
*/
|
||||
ldr r0, [sp, #0]
|
||||
add r1, sp, #4 + 3*4 @ r[3..7] = t3..7, r11=u8, r12=t9
|
||||
ldmia r1, {r2,r7,r8,r9,r10,r11,r12}
|
||||
add r1, r0, #3*4
|
||||
stmia r1, {r2,r7,r8,r9,r10}
|
||||
|
||||
bic r2, r3, field_not_M @ r[8] = c & M
|
||||
str r2, [r0, #8*4]
|
||||
mov r3, r3, lsr #26 @ c >>= 26
|
||||
orr r3, r3, r4, asl #6
|
||||
mov r4, r4, lsr #26
|
||||
mov r14, field_R1 @ c += u8 * R1
|
||||
umlal r3, r4, r11, r14
|
||||
movw r14, field_R0 @ c += d * R0
|
||||
umlal r3, r4, r5, r14
|
||||
adds r3, r3, r12 @ c += t9
|
||||
adc r4, r4, #0
|
||||
|
||||
add r1, sp, #4 + 0*4 @ r7,r8,r9 = t0,t1,t2
|
||||
ldmia r1, {r7,r8,r9}
|
||||
|
||||
ubfx r2, r3, #0, #22 @ r[9] = c & (M >> 4)
|
||||
str r2, [r0, #9*4]
|
||||
mov r3, r3, lsr #22 @ c >>= 22
|
||||
orr r3, r3, r4, asl #10
|
||||
mov r4, r4, lsr #22
|
||||
movw r14, field_R1 << 4 @ c += d * (R1 << 4)
|
||||
umlal r3, r4, r5, r14
|
||||
|
||||
movw r14, field_R0 >> 4 @ d = c * (R0 >> 4) + t0 (64x64 multiply+add)
|
||||
umull r5, r6, r3, r14 @ d = c.lo * (R0 >> 4)
|
||||
adds r5, r5, r7 @ d.lo += t0
|
||||
mla r6, r14, r4, r6 @ d.hi += c.hi * (R0 >> 4)
|
||||
adc r6, r6, 0 @ d.hi += carry
|
||||
|
||||
bic r2, r5, field_not_M @ r[0] = d & M
|
||||
str r2, [r0, #0*4]
|
||||
|
||||
mov r5, r5, lsr #26 @ d >>= 26
|
||||
orr r5, r5, r6, asl #6
|
||||
mov r6, r6, lsr #26
|
||||
|
||||
movw r14, field_R1 >> 4 @ d += c * (R1 >> 4) + t1 (64x64 multiply+add)
|
||||
umull r1, r2, r3, r14 @ tmp = c.lo * (R1 >> 4)
|
||||
adds r5, r5, r8 @ d.lo += t1
|
||||
adc r6, r6, #0 @ d.hi += carry
|
||||
adds r5, r5, r1 @ d.lo += tmp.lo
|
||||
mla r2, r14, r4, r2 @ tmp.hi += c.hi * (R1 >> 4)
|
||||
adc r6, r6, r2 @ d.hi += carry + tmp.hi
|
||||
|
||||
bic r2, r5, field_not_M @ r[1] = d & M
|
||||
str r2, [r0, #1*4]
|
||||
mov r5, r5, lsr #26 @ d >>= 26 (ignore hi)
|
||||
orr r5, r5, r6, asl #6
|
||||
|
||||
add r5, r5, r9 @ d += t2
|
||||
str r5, [r0, #2*4] @ r[2] = d
|
||||
|
||||
add sp, sp, #48
|
||||
ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
.size secp256k1_fe_sqr_inner, .-secp256k1_fe_sqr_inner
|
||||
|
|
@ -429,6 +429,14 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_f
|
|||
#endif
|
||||
}
|
||||
|
||||
#if defined(USE_EXTERNAL_ASM)
|
||||
|
||||
/* External assembler implementation */
|
||||
void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b);
|
||||
void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
|
||||
|
||||
#else
|
||||
|
||||
#ifdef VERIFY
|
||||
#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
|
||||
#else
|
||||
|
@ -1037,7 +1045,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
|
|||
VERIFY_BITS(r[2], 27);
|
||||
/* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) {
|
||||
#ifdef VERIFY
|
||||
|
|
|
@ -4,5 +4,5 @@ noinst_HEADERS += src/modules/ecdh/tests_impl.h
|
|||
if USE_BENCHMARK
|
||||
noinst_PROGRAMS += bench_ecdh
|
||||
bench_ecdh_SOURCES = src/bench_ecdh.c
|
||||
bench_ecdh_LDADD = libsecp256k1.la $(SECP_LIBS)
|
||||
bench_ecdh_LDADD = libsecp256k1.la $(SECP_LIBS) $(COMMON_LIB)
|
||||
endif
|
||||
|
|
|
@ -4,5 +4,5 @@ noinst_HEADERS += src/modules/recovery/tests_impl.h
|
|||
if USE_BENCHMARK
|
||||
noinst_PROGRAMS += bench_recover
|
||||
bench_recover_SOURCES = src/bench_recover.c
|
||||
bench_recover_LDADD = libsecp256k1.la $(SECP_LIBS)
|
||||
bench_recover_LDADD = libsecp256k1.la $(SECP_LIBS) $(COMMON_LIB)
|
||||
endif
|
||||
|
|
|
@ -6,5 +6,5 @@ noinst_HEADERS += src/modules/schnorr/tests_impl.h
|
|||
if USE_BENCHMARK
|
||||
noinst_PROGRAMS += bench_schnorr_verify
|
||||
bench_schnorr_verify_SOURCES = src/bench_schnorr_verify.c
|
||||
bench_schnorr_verify_LDADD = libsecp256k1.la $(SECP_LIBS)
|
||||
bench_schnorr_verify_LDADD = libsecp256k1.la $(SECP_LIBS) $(COMMON_LIB)
|
||||
endif
|
||||
|
|
Loading…
Add table
Reference in a new issue