Skip to content

Commit

Permalink
[EC] ec_nistp P-256 C scalar_mul_{base|public} (#2033)
Browse files Browse the repository at this point in the history
Use `ec_nistp` implementation for P-256 C code.
Update `make_tables.go` to generate the base point
table in the same way as for P-384 and P-521.
  • Loading branch information
dkostic authored Dec 23, 2024
1 parent 445813a commit acf5220
Show file tree
Hide file tree
Showing 4 changed files with 1,699 additions and 504 deletions.
4 changes: 2 additions & 2 deletions crypto/fipsmodule/ec/ec_nistp.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
// |----------------------------|
// | 1. | x | x | x* |
// | 2. | x | x | x* |
// | 3. | x | x | |
// | 3. | x | x | x* |
// | 4. | x | x | x* |
// | 5. | x | x | |
// | 5. | x | x | x* |
// * For P-256, only the Fiat-crypto implementation in p256.c is replaced.

#include "ec_nistp.h"
Expand Down
76 changes: 21 additions & 55 deletions crypto/fipsmodule/ec/make_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,16 @@ static const alignas(4096) PRECOMP256_ROW ecp_nistz256_precomputed[37] = `
}

func writeP256Table(path string) error {

win_size := 5 // window size for the comb multiplication
pts_per_subtable := (1 << win_size) >> 1 // we keep only the odd multiples
num_subtables := int(math.Ceil(float64(256) / float64(win_size * 4))) // we use comb mul with step 4

curve := elliptic.P256()
tables := [][][2]*big.Int{
makeComb(curve, 64, 4, 0),
makeComb(curve, 64, 4, 32),
tables := make([][][2]*big.Int, 0, num_subtables)
for i := 0; i < num_subtables; i += 1 {
row := makeOddMultiples(curve, pts_per_subtable, i*win_size*4)
tables = append(tables, row)
}

f, err := os.Create(path)
Expand All @@ -272,67 +278,26 @@ func writeP256Table(path string) error {
defer f.Close()
w := &columnWriter{w: f}

const fileHeader = `/* Copyright (c) 2020, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
const fileHeader = `/*
------------------------------------------------------------------------------------
Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
SPDX-License-Identifier: Apache-2.0 OR ISC
------------------------------------------------------------------------------------
*/
// This file is generated by make_tables.go.
// Base point pre computation
// --------------------------
//
// Two different sorts of precomputed tables are used in the following code.
// Each contain various points on the curve, where each point is three field
// elements (x, y, z).
//
// For the base point table, z is usually 1 (0 for the point at infinity).
// This table has 2 * 16 elements, starting with the following:
// index | bits | point
// ------+---------+------------------------------
// 0 | 0 0 0 0 | 0G
// 1 | 0 0 0 1 | 1G
// 2 | 0 0 1 0 | 2^64G
// 3 | 0 0 1 1 | (2^64 + 1)G
// 4 | 0 1 0 0 | 2^128G
// 5 | 0 1 0 1 | (2^128 + 1)G
// 6 | 0 1 1 0 | (2^128 + 2^64)G
// 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
// 8 | 1 0 0 0 | 2^192G
// 9 | 1 0 0 1 | (2^192 + 1)G
// 10 | 1 0 1 0 | (2^192 + 2^64)G
// 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
// 12 | 1 1 0 0 | (2^192 + 2^128)G
// 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
// 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
// 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
// followed by a copy of this with each element multiplied by 2^32.
//
// The reason for this is so that we can clock bits into four different
// locations when doing simple scalar multiplies against the base point,
// and then another four locations using the second 16 elements.
//
// Tables for other points have table[i] = iG for i in 0 .. 16.
#if defined(EC_NISTP_USE_64BIT_LIMB)`

// fiat_p256_g_pre_comp is the table of precomputed base points
#if defined(BORINGSSL_NISTP256_64BIT)
static const fiat_p256_felem fiat_p256_g_pre_comp[2][15][2] = `
if _, err := io.WriteString(w, fileHeader); err != nil {
table_def_str := fmt.Sprintf("static const fiat_p256_felem fiat_p256_g_pre_comp[%d][%d][2] = ", num_subtables, pts_per_subtable)

if _, err := io.WriteString(w, fileHeader + "\n" + table_def_str); err != nil {
return err
}
if err := writeTables(w, curve, tables, writeU64Mont, nil); err != nil {
return err
}
if _, err := io.WriteString(w, ";\n#else\nstatic const fiat_p256_felem fiat_p256_g_pre_comp[2][15][2] = "); err != nil {
if _, err := io.WriteString(w, ";\n#else\n" + table_def_str); err != nil {
return err
}
if err := writeTables(w, curve, tables, writeU32Mont, nil); err != nil {
Expand All @@ -345,6 +310,7 @@ static const fiat_p256_felem fiat_p256_g_pre_comp[2][15][2] = `
return nil
}


func writeP384Table(path string) error {

win_size := 5 // window size for the comb multiplication
Expand Down
186 changes: 17 additions & 169 deletions crypto/fipsmodule/ec/p256.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,6 @@ static fiat_p256_limb_t fiat_p256_nz(
return ret;
}

static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
out[i] = in1[i];
}
}

static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
fiat_p256_limb_t t,
const fiat_p256_limb_t z[FIAT_P256_NLIMBS],
const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) {
fiat_p256_selectznz(out, !!t, z, nz);
}

static void fiat_p256_from_words(fiat_p256_felem out,
const BN_ULONG in[32 / sizeof(BN_ULONG)]) {
// Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on
Expand Down Expand Up @@ -186,6 +172,8 @@ static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
ec_nistp_point_add(p256_methods(), x3, y3, z3, x1, y1, z1, mixed, x2, y2, z2);
}

#include "./p256_table.h"

DEFINE_METHOD_FUNCTION(ec_nistp_meth, p256_methods) {
out->felem_num_limbs = FIAT_P256_NLIMBS;
out->felem_num_bits = 256;
Expand All @@ -195,39 +183,10 @@ DEFINE_METHOD_FUNCTION(ec_nistp_meth, p256_methods) {
out->felem_sqr = fiat_p256_square;
out->felem_neg = fiat_p256_opp;
out->felem_nz = fiat_p256_nz;
out->felem_one = fiat_p256_one;
out->point_dbl = fiat_p256_point_double;
out->point_add = fiat_p256_point_add;
}

#include "./p256_table.h"

// fiat_p256_select_point_affine selects the |idx-1|th point from a
// precomputation table and copies it to out. If |idx| is zero, the output is
// the point at infinity.
static void fiat_p256_select_point_affine(
const fiat_p256_limb_t idx, size_t size,
const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) {
OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
for (size_t i = 0; i < size; i++) {
fiat_p256_limb_t mismatch = i ^ (idx - 1);
fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
}
fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one);
}

// fiat_p256_get_bit returns the |i|th bit in |in|.
static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) {
if (i < 0 || i >= 256) {
return 0;
}
#if defined(OPENSSL_64_BIT)
assert(sizeof(BN_ULONG) == 8);
return (in->words[i >> 6] >> (i & 63)) & 1;
#else
assert(sizeof(BN_ULONG) == 4);
return (in->words[i >> 5] >> (i & 31)) & 1;
#endif
out->scalar_mul_base_table = (const ec_nistp_felem_limb*) fiat_p256_g_pre_comp;
}

// OPENSSL EC_METHOD FUNCTIONS
Expand Down Expand Up @@ -312,141 +271,30 @@ static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
EC_JACOBIAN *r,
const EC_SCALAR *scalar) {
// Set nq to the point at infinity.
fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3];
fiat_p256_felem res[3];

int skip = 1; // Save two point operations in the first round.
for (size_t i = 31; i < 32; i--) {
if (!skip) {
fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
}

// First, look 32 bits upwards.
crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3;
bits |= fiat_p256_get_bit(scalar, i + 160) << 2;
bits |= fiat_p256_get_bit(scalar, i + 96) << 1;
bits |= fiat_p256_get_bit(scalar, i + 32);
// Select the point to add, in constant time.
fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
fiat_p256_g_pre_comp[1], tmp);

if (!skip) {
fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
1 /* mixed */, tmp[0], tmp[1], tmp[2]);
} else {
fiat_p256_copy(nq[0], tmp[0]);
fiat_p256_copy(nq[1], tmp[1]);
fiat_p256_copy(nq[2], tmp[2]);
skip = 0;
}
ec_nistp_scalar_mul_base(p256_methods(), res[0], res[1], res[2], scalar);

// Second, look at the current position.
bits = fiat_p256_get_bit(scalar, i + 192) << 3;
bits |= fiat_p256_get_bit(scalar, i + 128) << 2;
bits |= fiat_p256_get_bit(scalar, i + 64) << 1;
bits |= fiat_p256_get_bit(scalar, i);
// Select the point to add, in constant time.
fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
fiat_p256_g_pre_comp[0], tmp);
fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
tmp[0], tmp[1], tmp[2]);
}

fiat_p256_to_generic(&r->X, nq[0]);
fiat_p256_to_generic(&r->Y, nq[1]);
fiat_p256_to_generic(&r->Z, nq[2]);
fiat_p256_to_generic(&r->X, res[0]);
fiat_p256_to_generic(&r->Y, res[1]);
fiat_p256_to_generic(&r->Z, res[2]);
}

static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
EC_JACOBIAN *r,
const EC_SCALAR *g_scalar,
const EC_JACOBIAN *p,
const EC_SCALAR *p_scalar) {
#define P256_WSIZE_PUBLIC 4
// Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|.
fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3];
fiat_p256_from_generic(p_pre_comp[0][0], &p->X);
fiat_p256_from_generic(p_pre_comp[0][1], &p->Y);
fiat_p256_from_generic(p_pre_comp[0][2], &p->Z);
fiat_p256_felem p2[3];
fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0],
p_pre_comp[0][1], p_pre_comp[0][2]);
for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) {
fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
p_pre_comp[i - 1][0], p_pre_comp[i - 1][1],
p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1],
p2[2]);
}

// Set up the coefficients for |p_scalar|.
int8_t p_wNAF[257];
ec_compute_wNAF(p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);

// Set |ret| to the point at infinity.
int skip = 1; // Save some point operations.
fiat_p256_felem ret[3] = {{0}, {0}, {0}};
for (int i = 256; i >= 0; i--) {
if (!skip) {
fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]);
}

// For the |g_scalar|, we use the precomputed table without the
// constant-time lookup.
if (i <= 31) {
// First, look 32 bits upwards.
crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3;
bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2;
bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1;
bits |= fiat_p256_get_bit(g_scalar, i + 32);
if (bits != 0) {
size_t index = (size_t)(bits - 1);
fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
1 /* mixed */, fiat_p256_g_pre_comp[1][index][0],
fiat_p256_g_pre_comp[1][index][1],
fiat_p256_one);
skip = 0;
}

// Second, look at the current position.
bits = fiat_p256_get_bit(g_scalar, i + 192) << 3;
bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2;
bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1;
bits |= fiat_p256_get_bit(g_scalar, i);
if (bits != 0) {
size_t index = (size_t)(bits - 1);
fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
1 /* mixed */, fiat_p256_g_pre_comp[0][index][0],
fiat_p256_g_pre_comp[0][index][1],
fiat_p256_one);
skip = 0;
}
}
fiat_p256_felem res[3], tmp[3];
fiat_p256_from_generic(tmp[0], &p->X);
fiat_p256_from_generic(tmp[1], &p->Y);
fiat_p256_from_generic(tmp[2], &p->Z);

int digit = p_wNAF[i];
if (digit != 0) {
assert(digit & 1);
size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1);
fiat_p256_felem *y = &p_pre_comp[idx][1], tmp;
if (digit < 0) {
fiat_p256_opp(tmp, p_pre_comp[idx][1]);
y = &tmp;
}
if (!skip) {
fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
0 /* not mixed */, p_pre_comp[idx][0], *y,
p_pre_comp[idx][2]);
} else {
fiat_p256_copy(ret[0], p_pre_comp[idx][0]);
fiat_p256_copy(ret[1], *y);
fiat_p256_copy(ret[2], p_pre_comp[idx][2]);
skip = 0;
}
}
}
ec_nistp_scalar_mul_public(p256_methods(), res[0], res[1], res[2], g_scalar, tmp[0], tmp[1], tmp[2], p_scalar);

fiat_p256_to_generic(&r->X, ret[0]);
fiat_p256_to_generic(&r->Y, ret[1]);
fiat_p256_to_generic(&r->Z, ret[2]);
fiat_p256_to_generic(&r->X, res[0]);
fiat_p256_to_generic(&r->Y, res[1]);
fiat_p256_to_generic(&r->Z, res[2]);
}

static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,
Expand Down
Loading

0 comments on commit acf5220

Please sign in to comment.