Skip to content

Commit

Permalink
[EC] Use s2n-bignum's modular inversion for P-256/384/521 (#2057)
Browse files Browse the repository at this point in the history
Use s2n-bignum's inversion modulo the field characteristic
for curves P-256/384/521. This gives us the following
performance improvements:
```
_____Apple M1____| before |  after | speedup |
P-256 ECDH       |  22724 |  23419 |   1.03x |
P-256 ECDSA sign |  60677 |  69731 |   1.15x |
P-384 ECDH       |   5863 |   6217 |   1.06x |
P-384 ECDSA sign |  13232 |  15011 |   1.13x |
P-521 ECDH       |   4041 |   4163 |   1.03x |
P-521 ECDSA sign |   7079 |   7584 |   1.07x |

______x86_64_____| before |  after | speedup |
P-256 ECDH       |  19410 |  20408 |   1.05x |
P-256 ECDSA sign |  54477 |  63617 |   1.17x |
P-384 ECDH       |   5309 |   5599 |   1.05x |
P-384 ECDSA sign |  12087 |  13780 |   1.14x |
P-521 ECDH       |   3539 |   3677 |   1.04x |
P-521 ECDSA sign |   6584 |   7068 |   1.07x |

_______GV4_______| before |  after | speedup |
P-256 ECDH       |  16642 |  17491 |   1.05x |
P-256 ECDSA sign |  51527 |  61108 |   1.18x |
P-384 ECDH       |   4208 |   4453 |   1.06x |
P-384 ECDSA sign |   9848 |  11308 |   1.15x |
P-521 ECDH       |   2668 |   2811 |   1.05x |
P-521 ECDSA sign |   5092 |   5626 |   1.10x |
```
  • Loading branch information
dkostic authored Dec 17, 2024
1 parent b090db7 commit 51ae4b1
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 12 deletions.
3 changes: 3 additions & 0 deletions crypto/fipsmodule/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR

p256/p256_montjscalarmul.S
p256/p256_montjscalarmul_alt.S
p256/bignum_montinv_p256.S

p384/bignum_add_p384.S
p384/bignum_sub_p384.S
Expand All @@ -223,6 +224,7 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
p384/p384_montjdouble_alt.S
p384/p384_montjscalarmul.S
p384/p384_montjscalarmul_alt.S
p384/bignum_montinv_p384.S

p521/bignum_add_p521.S
p521/bignum_sub_p521.S
Expand All @@ -237,6 +239,7 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
p521/p521_jdouble_alt.S
p521/p521_jscalarmul.S
p521/p521_jscalarmul_alt.S
p521/bignum_inv_p521.S

curve25519/bignum_mod_n25519.S
curve25519/bignum_neg_p25519.S
Expand Down
6 changes: 6 additions & 0 deletions crypto/fipsmodule/ec/p256-nistz.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ static BN_ULONG is_not_zero(BN_ULONG in) {
// the Montgomery domain.
static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS],
const BN_ULONG in[P256_LIMBS]) {
#if defined(EC_NISTP_USE_S2N_BIGNUM)
ec_nistp_felem_limb in_sqr[P256_LIMBS];
ecp_nistz256_sqr_mont(in_sqr, in);
bignum_montinv_p256(r, in_sqr);
#else
// This implements the addition chain described in
// https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
BN_ULONG x2[P256_LIMBS], x3[P256_LIMBS], x6[P256_LIMBS], x12[P256_LIMBS],
Expand Down Expand Up @@ -188,6 +193,7 @@ static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS],

ecp_nistz256_sqr_mont(ret, ret);
ecp_nistz256_sqr_mont(r, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
#endif
}

// r = p * p_scalar
Expand Down
6 changes: 6 additions & 0 deletions crypto/fipsmodule/ec/p384.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ static void p384_from_scalar(p384_felem out, const EC_SCALAR *in) {
// ffffffff 00000000 00000000 fffffffc
static void p384_inv_square(p384_felem out,
const p384_felem in) {
#if defined(EC_NISTP_USE_S2N_BIGNUM)
ec_nistp_felem_limb in_sqr[P384_NLIMBS];
p384_felem_sqr(in_sqr, in);
bignum_montinv_p384(out, in_sqr);
#else
// This implements the addition chain described in
// https://briansmith.org/ecc-inversion-addition-chains-01#p384_field_inversion
// The side comments show the value of the exponent:
Expand Down Expand Up @@ -222,6 +227,7 @@ static void p384_inv_square(p384_felem out,

p384_felem_sqr(ret, ret);
p384_felem_sqr(out, ret); // 2^384 - 2^128 - 2^96 + 2^32 - 2^2 = p - 3
#endif
}

static void p384_point_double(p384_felem x_out,
Expand Down
4 changes: 4 additions & 0 deletions crypto/fipsmodule/ec/p521.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ static void p521_to_generic(EC_FELEM *out, const p521_felem in) {
// The code is autogenerated by the ECCKiila project:
// https://arxiv.org/abs/2007.11481
static void p521_felem_inv(p521_felem output, const p521_felem t1) {
#if defined(EC_NISTP_USE_S2N_BIGNUM)
bignum_inv_p521(output, t1);
#else
/* temporary variables */
p521_felem acc, t2, t4, t8, t16, t32, t64;
p521_felem t128, t256, t512, t516, t518, t519;
Expand Down Expand Up @@ -240,6 +243,7 @@ static void p521_felem_inv(p521_felem output, const p521_felem t1) {
p521_felem_sqr(acc, t519);
p521_felem_sqr(acc, acc);
p521_felem_mul(output, acc, t1);
#endif
}

static void p521_point_double(p521_felem x_out,
Expand Down
4 changes: 2 additions & 2 deletions third_party/s2n-bignum/arm/curve25519/bignum_mod_n25519.S
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ S2N_BN_SYMBOL(bignum_mod_n25519):

cbz k, writeback

loop:
bignum_mod_n25519_loop:

// Assume that the new 5-digit x is 2^64 * previous_x + next_digit.
// Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1)
Expand Down Expand Up @@ -154,7 +154,7 @@ loop:
adcs m2, t2, xzr
adc m3, t3, m3

cbnz k, loop
cbnz k, bignum_mod_n25519_loop

// Finally write back [m3;m2;m1;m0] and return

Expand Down
8 changes: 4 additions & 4 deletions third_party/s2n-bignum/arm/p521/bignum_inv_p521.S
Original file line number Diff line number Diff line change
Expand Up @@ -789,9 +789,9 @@ S2N_BN_SYMBOL(bignum_inv_p521):

mov i, #21
mov d, #1
b midloop
b bignum_inv_p521_midloop

loop:
bignum_inv_p521_loop:

// Separate the matrix elements into sign-magnitude pairs

Expand Down Expand Up @@ -1424,7 +1424,7 @@ loop:
adc x2, x2, x6
str x2, [v+64]

midloop:
bignum_inv_p521_midloop:

mov x1, d
ldr x2, [f]
Expand All @@ -1435,7 +1435,7 @@ midloop:
// Next iteration

subs i, i, #1
bne loop
bne bignum_inv_p521_loop

// The 21st and last iteration does not need anything except the
// u value and the sign of f; the latter can be obtained from the
Expand Down
15 changes: 15 additions & 0 deletions third_party/s2n-bignum/include/s2n-bignum_aws-lc.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ static inline void p256_montjscalarmul_selector(uint64_t res[S2N_BIGNUM_STATIC 1
else { p256_montjscalarmul(res, scalar, point); }
}

// Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1
// z = x^-1 mod p_256.
// The function is constant-time.
extern void bignum_montinv_p256(uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);

// Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced
// Inputs x[6], y[6]; output z[6]
extern void bignum_add_p384(uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
Expand Down Expand Up @@ -124,6 +129,11 @@ static inline void p384_montjscalarmul_selector(uint64_t res[S2N_BIGNUM_STATIC 1
else { p384_montjscalarmul(res, scalar, point); }
}

// Montgomery inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1
// z = x^-1 mod p_384.
// The function is constant-time.
extern void bignum_montinv_p384(uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);

// Convert 6-digit (384-bit) bignum from little-endian form
// Input x[6]; output z[6]
extern void bignum_fromlebytes_6(uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]);
Expand Down Expand Up @@ -185,6 +195,11 @@ static inline void p521_jscalarmul_selector(uint64_t res[S2N_BIGNUM_STATIC 27],
else { p521_jscalarmul(res, scalar, point); }
}

// Modular inverse modulo p_521 = 2^521 - 1
// z = x^-1 mod p_521.
// The function is constant-time.
extern void bignum_inv_p521(uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);

// curve25519_x25519_byte and curve25519_x25519_byte_alt computes the x25519
// function specified in https://www.rfc-editor.org/rfc/rfc7748. |scalar| is the
// scalar, |point| is the u-coordinate of the elliptic curve
Expand Down
4 changes: 2 additions & 2 deletions third_party/s2n-bignum/x86_att/curve25519/bignum_mod_n25519.S
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ S2N_BN_SYMBOL(bignum_mod_n25519):
testq k, k
jz writeback

loop:
bignum_mod_n25519_loop:

// Assume that the new 5-digit x is 2^64 * previous_x + next_digit.
// Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1)
Expand Down Expand Up @@ -183,7 +183,7 @@ loop:
movq d, m0

decq k
jnz loop
jnz bignum_mod_n25519_loop

// Write back

Expand Down
8 changes: 4 additions & 4 deletions third_party/s2n-bignum/x86_att/p521/bignum_inv_p521.S
Original file line number Diff line number Diff line change
Expand Up @@ -1095,9 +1095,9 @@ S2N_BN_SYMBOL(bignum_inv_p521):

movq $21, i
movq $1, d
jmp midloop
jmp bignum_inv_p521_midloop

loop:
bignum_inv_p521_loop:

// Separate out the matrix into sign-magnitude pairs

Expand Down Expand Up @@ -1775,15 +1775,15 @@ loop:
adcq %rax, %rbp
movq %rbp, V+8*N(%rsp)

midloop:
bignum_inv_p521_midloop:

divstep59(d,ff,gg)
movq %rsi, d

// Next iteration

decq i
jnz loop
jnz bignum_inv_p521_loop

// The 21st and last iteration does not need anything except the
// u value and the sign of f; the latter can be obtained from the
Expand Down

0 comments on commit 51ae4b1

Please sign in to comment.