diff --git a/Makefile b/Makefile index af55d85726a..f18ca434fdc 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -FLAGS_COMMON:=-Wall +FLAGS_COMMON:=-Wall -Wno-unused FLAGS_PROD:=-DNDEBUG -O2 -march=native FLAGS_DEBUG:=-DVERIFY -ggdb3 -O1 FLAGS_TEST:=-DVERIFY -ggdb3 -O2 -march=native @@ -15,20 +15,22 @@ default: all ifeq ($(CONF), openssl) FLAGS_CONF:=-DUSE_NUM_OPENSSL -DUSE_FIELDINVERSE_BUILTIN LIBS:=-lcrypto -SECP256K1_FILES := $(SECP256K1_FILES) num_openssl.h num_openssl.cpp +SECP256K1_FILES := $(SECP256K1_FILES) num_openssl.h num_openssl.cpp field_5x52_int128.cpp else ifeq ($(CONF), gmp) FLAGS_CONF:=-DUSE_NUM_GMP LIBS:=-lgmp -SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp +SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp field_5x52_int128.cpp else ifeq ($(CONF), gmpasm) FLAGS_CONF:=-DUSE_NUM_GMP -DINLINE_ASM -LIBS:=-lgmp obj/lin64.o -SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp obj/lin64.o +LIBS:=-lgmp obj/field_5x52_asm.o +SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp field_5x52_asm.cpp obj/field_5x52_asm.o -obj/lin64.o: lin64.asm - yasm -f elf64 -o obj/lin64.o lin64.asm +obj/field_5x52_asm.o: field_5x52_asm.asm + yasm -f elf64 -o obj/field_5x52_asm.o field_5x52_asm.asm +else +SECP256K1_FILES := $(SECP256K1_FILES) field_5x52_int128.cpp endif endif endif diff --git a/field_5x52.cpp b/field_5x52.cpp index f86b6b44ec2..acc44444327 100644 --- a/field_5x52.cpp +++ b/field_5x52.cpp @@ -4,7 +4,9 @@ #include "field.h" #ifdef INLINE_ASM -#include "lin64.h" +#include "field_5x52_asm.cpp" +#else +#include "field_5x52_int128.cpp" #endif extern "C" { @@ -165,119 +167,19 @@ void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const s #ifdef VERIFY assert(a->magnitude <= 8); assert(b->magnitude <= 8); -#endif - -#ifdef INLINE_ASM - ExSetMult((uint64_t*)a->n, (uint64_t*)b->n, (uint64_t*)r->n); -#else - unsigned __int128 c = (__int128)a->n[0] * b->n[0]; - uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0 - c = c + (__int128)a->n[0] * b->n[1] + - (__int128)a->n[1] * b->n[0]; - uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF - c = c + (__int128)a->n[0] * b->n[2] + - (__int128)a->n[1] * b->n[1] + - (__int128)a->n[2] * b->n[0]; - uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0 - c = c + (__int128)a->n[0] * b->n[3] + - (__int128)a->n[1] * b->n[2] + - (__int128)a->n[2] * b->n[1] + - (__int128)a->n[3] * b->n[0]; - uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280 - c = c + (__int128)a->n[0] * b->n[4] + - (__int128)a->n[1] * b->n[3] + - (__int128)a->n[2] * b->n[2] + - (__int128)a->n[3] * b->n[1] + - (__int128)a->n[4] * b->n[0]; - uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E - c = c + (__int128)a->n[1] * b->n[4] + - (__int128)a->n[2] * b->n[3] + - (__int128)a->n[3] * b->n[2] + - (__int128)a->n[4] * b->n[1]; - uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE - c = c + (__int128)a->n[2] * b->n[4] + - (__int128)a->n[3] * b->n[3] + - (__int128)a->n[4] * b->n[2]; - uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE - c = c + (__int128)a->n[3] * b->n[4] + - (__int128)a->n[4] * b->n[3]; - uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE - c = c + (__int128)a->n[4] * b->n[4]; - uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E - uint64_t t9 = c; - - c = t0 + (__int128)t5 * 0x1000003D10ULL; - t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t1 + (__int128)t6 * 0x1000003D10ULL; - t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t2 + (__int128)t7 * 0x1000003D10ULL; - r->n[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t3 + (__int128)t8 * 0x1000003D10ULL; - r->n[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t4 + (__int128)t9 * 0x1000003D10ULL; - r->n[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110 - c = t0 + (__int128)c * 0x1000003D1ULL; - r->n[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008 - r->n[1] = t1 + c; -#endif -#ifdef VERIFY r->magnitude = 1; r->normalized = 0; #endif + secp256k1_fe_mul_inner(a->n, b->n, r->n); } void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { #ifdef VERIFY assert(a->magnitude <= 8); -#endif - -#ifdef INLINE_ASM - ExSetSquare((uint64_t*)&a->n, (uint64_t*)&r->n); -#else - __int128 c = (__int128)a->n[0] * a->n[0]; - uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0 - c = c + (__int128)(a->n[0]*2) * a->n[1]; - uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF - c = c + (__int128)(a->n[0]*2) * a->n[2] + - (__int128)a->n[1] * a->n[1]; - uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0 - c = c + (__int128)(a->n[0]*2) * a->n[3] + - (__int128)(a->n[1]*2) * a->n[2]; - uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280 - c = c + (__int128)(a->n[0]*2) * a->n[4] + - (__int128)(a->n[1]*2) * a->n[3] + - (__int128)a->n[2] * a->n[2]; - uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E - c = c + (__int128)(a->n[1]*2) * a->n[4] + - (__int128)(a->n[2]*2) * a->n[3]; - uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE - c = c + (__int128)(a->n[2]*2) * a->n[4] + - (__int128)a->n[3] * a->n[3]; - uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE - c = c + (__int128)(a->n[3]*2) * a->n[4]; - uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE - c = c + (__int128)a->n[4] * a->n[4]; - uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E - uint64_t t9 = c; - c = t0 + (__int128)t5 * 0x1000003D10ULL; - t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t1 + (__int128)t6 * 0x1000003D10ULL; - t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t2 + (__int128)t7 * 0x1000003D10ULL; - r->n[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t3 + (__int128)t8 * 0x1000003D10ULL; - r->n[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 - c = c + t4 + (__int128)t9 * 0x1000003D10ULL; - r->n[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110 - c = t0 + (__int128)c * 0x1000003D1ULL; - r->n[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008 - r->n[1] = t1 + c; -#endif - -#ifdef VERIFY r->magnitude = 1; r->normalized = 0; #endif + secp256k1_fe_sqr_inner(a->n, r->n); } } diff --git a/lin64.asm b/field_5x52_asm.asm similarity index 96% rename from lin64.asm rename to field_5x52_asm.asm index cba02db45af..ef1c1c9b52e 100644 --- a/lin64.asm +++ b/field_5x52_asm.asm @@ -1,10 +1,10 @@ ;; Added by Diederik Huys, March 2013 ;; ;; Provided public procedures: - ;; ExSetMult - ;; ExSetSquare + ;; secp256k1_fe_mul_inner + ;; secp256k1_fe_sqr_inner ;; - ;; Needed tools: YASM (http://www.japheth.de/JWasm.html) + ;; Needed tools: YASM (http://yasm.tortall.net) ;; ;; @@ -12,9 +12,9 @@ ;; Procedure ExSetMult ;; Register Layout: - ;; INPUT: rdi = a.n - ;; rsi = b.n - ;; rdx = this.a + ;; INPUT: rdi = a->n + ;; rsi = b->n + ;; rdx = r->a ;; ;; INTERNAL: rdx:rax = multiplication accumulator ;; r9:r8 = c @@ -26,9 +26,9 @@ ;; rbp = Constant 0FFFFFFFFFFFFFh / t8 ;; rsi = b.n / b.n[4] / t9 - GLOBAL ExSetMult + GLOBAL secp256k1_fe_mul_inner ALIGN 32 -ExSetMult: +secp256k1_fe_mul_inner: push rbp push rbx push r12 @@ -315,9 +315,9 @@ common_exit_norm: ;; rcx = a.n[3] / t7 ;; rbp = 0FFFFFFFFFFFFFh / t8 ;; rsi = a.n[4] / a.n[4] /t9 - GLOBAL ExSetSquare + GLOBAL secp256k1_fe_sqr_inner ALIGN 32 -ExSetSquare: +secp256k1_fe_sqr_inner: push rbp push rbx push r12 diff --git a/field_5x52_asm.cpp b/field_5x52_asm.cpp new file mode 100644 index 00000000000..78b5a28d965 --- /dev/null +++ b/field_5x52_asm.cpp @@ -0,0 +1,2 @@ +extern "C" void __attribute__ ((sysv_abi)) secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r); +extern "C" void __attribute__ ((sysv_abi)) secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r); diff --git a/field_5x52_int128.cpp b/field_5x52_int128.cpp new file mode 100644 index 00000000000..b04fd62e419 --- /dev/null +++ b/field_5x52_int128.cpp @@ -0,0 +1,100 @@ +#include "field.h" + +extern "C" { + +void static inline secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) { + unsigned __int128 c = (__int128)a[0] * b[0]; + uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0 + c = c + (__int128)a[0] * b[1] + + (__int128)a[1] * b[0]; + uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF + c = c + (__int128)a[0] * b[2] + + (__int128)a[1] * b[1] + + (__int128)a[2] * b[0]; + uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0 + c = c + (__int128)a[0] * b[3] + + (__int128)a[1] * b[2] + + (__int128)a[2] * b[1] + + (__int128)a[3] * b[0]; + uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280 + c = c + (__int128)a[0] * b[4] + + (__int128)a[1] * b[3] + + (__int128)a[2] * b[2] + + (__int128)a[3] * b[1] + + (__int128)a[4] * b[0]; + uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E + c = c + (__int128)a[1] * b[4] + + (__int128)a[2] * b[3] + + (__int128)a[3] * b[2] + + (__int128)a[4] * b[1]; + uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE + c = c + (__int128)a[2] * b[4] + + (__int128)a[3] * b[3] + + (__int128)a[4] * b[2]; + uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE + c = c + (__int128)a[3] * b[4] + + (__int128)a[4] * b[3]; + uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE + c = c + (__int128)a[4] * b[4]; + uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E + uint64_t t9 = c; + + c = t0 + (__int128)t5 * 0x1000003D10ULL; + t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t1 + (__int128)t6 * 0x1000003D10ULL; + t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t2 + (__int128)t7 * 0x1000003D10ULL; + r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t3 + (__int128)t8 * 0x1000003D10ULL; + r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t4 + (__int128)t9 * 0x1000003D10ULL; + r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110 + c = t0 + (__int128)c * 0x1000003D1ULL; + r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008 + r[1] = t1 + c; + +} + +void static inline secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r) { + __int128 c = (__int128)a[0] * a[0]; + uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0 + c = c + (__int128)(a[0]*2) * a[1]; + uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF + c = c + (__int128)(a[0]*2) * a[2] + + (__int128)a[1] * a[1]; + uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0 + c = c + (__int128)(a[0]*2) * a[3] + + (__int128)(a[1]*2) * a[2]; + uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280 + c = c + (__int128)(a[0]*2) * a[4] + + (__int128)(a[1]*2) * a[3] + + (__int128)a[2] * a[2]; + uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E + c = c + (__int128)(a[1]*2) * a[4] + + (__int128)(a[2]*2) * a[3]; + uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE + c = c + (__int128)(a[2]*2) * a[4] + + (__int128)a[3] * a[3]; + uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE + c = c + (__int128)(a[3]*2) * a[4]; + uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE + c = c + (__int128)a[4] * a[4]; + uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E + uint64_t t9 = c; + c = t0 + (__int128)t5 * 0x1000003D10ULL; + t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t1 + (__int128)t6 * 0x1000003D10ULL; + t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t2 + (__int128)t7 * 0x1000003D10ULL; + r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t3 + (__int128)t8 * 0x1000003D10ULL; + r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10 + c = c + t4 + (__int128)t9 * 0x1000003D10ULL; + r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110 + c = t0 + (__int128)c * 0x1000003D1ULL; + r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008 + r[1] = t1 + c; + +} + +} diff --git a/lin64.h b/lin64.h deleted file mode 100644 index b5c53ac50f9..00000000000 --- a/lin64.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _SECP256K1_LIN64 -#define _SECP256K1_LIN64 - -#ifdef INLINE_ASM -extern "C" void __attribute__ ((sysv_abi)) ExSetMult(uint64_t *, uint64_t *, uint64_t *); -extern "C" void __attribute__ ((sysv_abi)) ExSetSquare(uint64_t *, uint64_t *); -#endif - -#endif diff --git a/num_openssl_.cpp b/num_openssl_.cpp new file mode 100644 index 00000000000..cd2673ea53a --- /dev/null +++ b/num_openssl_.cpp @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include + +#include "num_openssl.h" + +namespace secp256k1 { + +class Context { +private: + BN_CTX *ctx; + + operator BN_CTX*() { + return ctx; + } + + friend class Number; +public: + Context() { + ctx = BN_CTX_new(); + } + + ~Context() { + BN_CTX_free(ctx); + } +}; + +Number::operator const BIGNUM*() const { + return &b; +} + +Number::operator BIGNUM*() { + return &b; +} + +Number::Number() { + BN_init(*this); +} + +Number::~Number() { + BN_free(*this); +} + +Number::Number(const unsigned char *bin, int len) { + BN_init(*this); + SetBytes(bin,len); +} + +void Number::SetNumber(const Number &x) { + BN_copy(*this, x); +} + +Number::Number(const Number &x) { + BN_init(*this); + BN_copy(*this, x); +} + +Number &Number::operator=(const Number &x) { + BN_copy(*this, x); + return *this; +} + +void Number::SetBytes(const unsigned char *bin, int len) { + BN_bin2bn(bin, len, *this); +} + +void Number::GetBytes(unsigned char *bin, int len) { + int size = BN_num_bytes(*this); + assert(size <= len); + memset(bin,0,len); + BN_bn2bin(*this, bin + len - size); +} + +void Number::SetInt(int x) { + if (x >= 0) { + BN_set_word(*this, x); + } else { + BN_set_word(*this, -x); + BN_set_negative(*this, 1); + } +} + +void Number::SetModInverse(const Number &x, const Number &m) { + Context ctx; + BN_mod_inverse(*this, x, m, ctx); +} + +void Number::SetModMul(const Number &a, const Number &b, const Number &m) { + Context ctx; + BN_mod_mul(*this, a, b, m, ctx); +} + +void Number::SetAdd(const Number &a1, const Number &a2) { + BN_add(*this, a1, a2); +} + +void Number::SetSub(const Number &a1, const Number &a2) { + BN_sub(*this, a1, a2); +} + +void Number::SetMult(const Number &a1, const Number &a2) { + Context ctx; + BN_mul(*this, a1, a2, ctx); +} + +void Number::SetDiv(const Number &a1, const Number &a2) { + Context ctx; + BN_div(*this, NULL, a1, a2, ctx); +} + +void Number::SetMod(const Number &a, const Number &m) { + Context ctx; + BN_nnmod(*this, a, m, ctx); +} + +int Number::Compare(const Number &a) const { + return BN_cmp(*this, a); +} + +int Number::GetBits() const { + return BN_num_bits(*this); +} + +int Number::ShiftLowBits(int bits) { + BIGNUM *bn = *this; + int ret = BN_is_zero(bn) ? 0 : bn->d[0] & ((1 << bits) - 1); + BN_rshift(*this, *this, bits); + return ret; +} + +bool Number::IsZero() const { + return BN_is_zero((const BIGNUM*)*this); +} + +bool Number::IsOdd() const { + return BN_is_odd((const BIGNUM*)*this); +} + +bool Number::CheckBit(int pos) const { + return BN_is_bit_set((const BIGNUM*)*this, pos); +} + +bool Number::IsNeg() const { + return BN_is_negative((const BIGNUM*)*this); +} + +void Number::Negate() { + BN_set_negative(*this, !IsNeg()); +} + +void Number::Shift1() { + BN_rshift1(*this,*this); +} + +void Number::Inc() { + BN_add_word(*this,1); +} + +void Number::SetHex(const std::string &str) { + BIGNUM *bn = *this; + BN_hex2bn(&bn, str.c_str()); +} + +void Number::SetPseudoRand(const Number &max) { + BN_pseudo_rand_range(*this, max); +} + +void Number::SplitInto(int bits, Number &low, Number &high) const { + BN_copy(low, *this); + BN_mask_bits(low, bits); + BN_rshift(high, *this, bits); +} + +std::string Number::ToString() const { + char *str = BN_bn2hex(*this); + std::string ret(str); + OPENSSL_free(str); + return ret; +} + +}