[EC] ec_nistp P-256 C scalar_mul_{base|public} (#2033)

Use `ec_nistp` implementation for P-256 C code. Update `make_tables.go` to generate the base point table in the same way as for P-384 and P-521.
aws · Dec 23, 2024 · acf5220 · acf5220
1 parent 445813a
commit acf5220
Show file tree

Hide file tree

Showing 4 changed files with 1,699 additions and 504 deletions.
diff --git a/crypto/fipsmodule/ec/ec_nistp.c b/crypto/fipsmodule/ec/ec_nistp.c
@@ -17,9 +17,9 @@
 // |----------------------------|
 // | 1. |   x   |   x   |   x*  |
 // | 2. |   x   |   x   |   x*  |
-// | 3. |   x   |   x   |       |
+// | 3. |   x   |   x   |   x*  |
 // | 4. |   x   |   x   |   x*  |
-// | 5. |   x   |   x   |       |
+// | 5. |   x   |   x   |   x*  |
 //  * For P-256, only the Fiat-crypto implementation in p256.c is replaced. 
 
 #include "ec_nistp.h"

diff --git a/crypto/fipsmodule/ec/make_tables.go b/crypto/fipsmodule/ec/make_tables.go
@@ -259,10 +259,16 @@ static const alignas(4096) PRECOMP256_ROW ecp_nistz256_precomputed[37] = `
 }
 
 func writeP256Table(path string) error {
+
+	win_size := 5 // window size for the comb multiplication
+	pts_per_subtable := (1 << win_size) >> 1 // we keep only the odd multiples
+	num_subtables := int(math.Ceil(float64(256) / float64(win_size * 4))) // we use comb mul with step 4
+
 	curve := elliptic.P256()
-	tables := [][][2]*big.Int{
-		makeComb(curve, 64, 4, 0),
-		makeComb(curve, 64, 4, 32),
+	tables := make([][][2]*big.Int, 0, num_subtables)
+	for i := 0; i < num_subtables; i += 1 {
+		row := makeOddMultiples(curve, pts_per_subtable, i*win_size*4)
+		tables = append(tables, row)
 	}
 
 	f, err := os.Create(path)
@@ -272,67 +278,26 @@ func writeP256Table(path string) error {
 	defer f.Close()
 	w := &columnWriter{w: f}
 
-	const fileHeader = `/* Copyright (c) 2020, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+	const fileHeader = `/*
+------------------------------------------------------------------------------------
+ Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ SPDX-License-Identifier: Apache-2.0 OR ISC
+------------------------------------------------------------------------------------
+*/
 
 // This file is generated by make_tables.go.
 
-// Base point pre computation
-// --------------------------
-//
-// Two different sorts of precomputed tables are used in the following code.
-// Each contain various points on the curve, where each point is three field
-// elements (x, y, z).
-//
-// For the base point table, z is usually 1 (0 for the point at infinity).
-// This table has 2 * 16 elements, starting with the following:
-// index | bits    | point
-// ------+---------+------------------------------
-//     0 | 0 0 0 0 | 0G
-//     1 | 0 0 0 1 | 1G
-//     2 | 0 0 1 0 | 2^64G
-//     3 | 0 0 1 1 | (2^64 + 1)G
-//     4 | 0 1 0 0 | 2^128G
-//     5 | 0 1 0 1 | (2^128 + 1)G
-//     6 | 0 1 1 0 | (2^128 + 2^64)G
-//     7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
-//     8 | 1 0 0 0 | 2^192G
-//     9 | 1 0 0 1 | (2^192 + 1)G
-//    10 | 1 0 1 0 | (2^192 + 2^64)G
-//    11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
-//    12 | 1 1 0 0 | (2^192 + 2^128)G
-//    13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
-//    14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
-//    15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
-// followed by a copy of this with each element multiplied by 2^32.
-//
-// The reason for this is so that we can clock bits into four different
-// locations when doing simple scalar multiplies against the base point,
-// and then another four locations using the second 16 elements.
-//
-// Tables for other points have table[i] = iG for i in 0 .. 16.
+#if defined(EC_NISTP_USE_64BIT_LIMB)`
 
-// fiat_p256_g_pre_comp is the table of precomputed base points
-#if defined(BORINGSSL_NISTP256_64BIT)
-static const fiat_p256_felem fiat_p256_g_pre_comp[2][15][2] = `
-	if _, err := io.WriteString(w, fileHeader); err != nil {
+	table_def_str := fmt.Sprintf("static const fiat_p256_felem fiat_p256_g_pre_comp[%d][%d][2] = ", num_subtables, pts_per_subtable)
+
+	if _, err := io.WriteString(w, fileHeader + "\n" + table_def_str); err != nil {
 		return err
 	}
 	if err := writeTables(w, curve, tables, writeU64Mont, nil); err != nil {
 		return err
 	}
-	if _, err := io.WriteString(w, ";\n#else\nstatic const fiat_p256_felem fiat_p256_g_pre_comp[2][15][2] = "); err != nil {
+	if _, err := io.WriteString(w, ";\n#else\n" + table_def_str); err != nil {
 		return err
 	}
 	if err := writeTables(w, curve, tables, writeU32Mont, nil); err != nil {
@@ -345,6 +310,7 @@ static const fiat_p256_felem fiat_p256_g_pre_comp[2][15][2] = `
 	return nil
 }
 
+
 func writeP384Table(path string) error {
 
 	win_size := 5 // window size for the comb multiplication

diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c
@@ -64,20 +64,6 @@ static fiat_p256_limb_t fiat_p256_nz(
   return ret;
 }
 
-static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
-                           const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
-  for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
-    out[i] = in1[i];
-  }
-}
-
-static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
-                              fiat_p256_limb_t t,
-                              const fiat_p256_limb_t z[FIAT_P256_NLIMBS],
-                              const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) {
-  fiat_p256_selectznz(out, !!t, z, nz);
-}
-
 static void fiat_p256_from_words(fiat_p256_felem out,
                                  const BN_ULONG in[32 / sizeof(BN_ULONG)]) {
   // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on
@@ -186,6 +172,8 @@ static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
   ec_nistp_point_add(p256_methods(), x3, y3, z3, x1, y1, z1, mixed, x2, y2, z2);
 }
 
+#include "./p256_table.h"
+
 DEFINE_METHOD_FUNCTION(ec_nistp_meth, p256_methods) {
     out->felem_num_limbs = FIAT_P256_NLIMBS;
     out->felem_num_bits = 256;
@@ -195,39 +183,10 @@ DEFINE_METHOD_FUNCTION(ec_nistp_meth, p256_methods) {
     out->felem_sqr = fiat_p256_square;
     out->felem_neg = fiat_p256_opp;
     out->felem_nz  = fiat_p256_nz;
+    out->felem_one = fiat_p256_one;
     out->point_dbl = fiat_p256_point_double;
     out->point_add = fiat_p256_point_add;
-}
-
-#include "./p256_table.h"
-
-// fiat_p256_select_point_affine selects the |idx-1|th point from a
-// precomputation table and copies it to out. If |idx| is zero, the output is
-// the point at infinity.
-static void fiat_p256_select_point_affine(
-    const fiat_p256_limb_t idx, size_t size,
-    const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) {
-  OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
-  for (size_t i = 0; i < size; i++) {
-    fiat_p256_limb_t mismatch = i ^ (idx - 1);
-    fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
-    fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
-  }
-  fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one);
-}
-
-// fiat_p256_get_bit returns the |i|th bit in |in|.
-static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) {
-  if (i < 0 || i >= 256) {
-    return 0;
-  }
-#if defined(OPENSSL_64_BIT)
-  assert(sizeof(BN_ULONG) == 8);
-  return (in->words[i >> 6] >> (i & 63)) & 1;
-#else
-  assert(sizeof(BN_ULONG) == 4);
-  return (in->words[i >> 5] >> (i & 31)) & 1;
-#endif
+    out->scalar_mul_base_table = (const ec_nistp_felem_limb*) fiat_p256_g_pre_comp;
 }
 
 // OPENSSL EC_METHOD FUNCTIONS
@@ -312,141 +271,30 @@ static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
 static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
                                            EC_JACOBIAN *r,
                                            const EC_SCALAR *scalar) {
-  // Set nq to the point at infinity.
-  fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3];
+  fiat_p256_felem res[3];
 
-  int skip = 1;  // Save two point operations in the first round.
-  for (size_t i = 31; i < 32; i--) {
-    if (!skip) {
-      fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
-    }
-
-    // First, look 32 bits upwards.
-    crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3;
-    bits |= fiat_p256_get_bit(scalar, i + 160) << 2;
-    bits |= fiat_p256_get_bit(scalar, i + 96) << 1;
-    bits |= fiat_p256_get_bit(scalar, i + 32);
-    // Select the point to add, in constant time.
-    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
-                                  fiat_p256_g_pre_comp[1], tmp);
-
-    if (!skip) {
-      fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
-                          1 /* mixed */, tmp[0], tmp[1], tmp[2]);
-    } else {
-      fiat_p256_copy(nq[0], tmp[0]);
-      fiat_p256_copy(nq[1], tmp[1]);
-      fiat_p256_copy(nq[2], tmp[2]);
-      skip = 0;
-    }
+  ec_nistp_scalar_mul_base(p256_methods(), res[0], res[1], res[2], scalar);
 
-    // Second, look at the current position.
-    bits = fiat_p256_get_bit(scalar, i + 192) << 3;
-    bits |= fiat_p256_get_bit(scalar, i + 128) << 2;
-    bits |= fiat_p256_get_bit(scalar, i + 64) << 1;
-    bits |= fiat_p256_get_bit(scalar, i);
-    // Select the point to add, in constant time.
-    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
-                                  fiat_p256_g_pre_comp[0], tmp);
-    fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                        tmp[0], tmp[1], tmp[2]);
-  }
-
-  fiat_p256_to_generic(&r->X, nq[0]);
-  fiat_p256_to_generic(&r->Y, nq[1]);
-  fiat_p256_to_generic(&r->Z, nq[2]);
+  fiat_p256_to_generic(&r->X, res[0]);
+  fiat_p256_to_generic(&r->Y, res[1]);
+  fiat_p256_to_generic(&r->Z, res[2]);
 }
 
 static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
                                              EC_JACOBIAN *r,
                                              const EC_SCALAR *g_scalar,
                                              const EC_JACOBIAN *p,
                                              const EC_SCALAR *p_scalar) {
-#define P256_WSIZE_PUBLIC 4
-  // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|.
-  fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3];
-  fiat_p256_from_generic(p_pre_comp[0][0], &p->X);
-  fiat_p256_from_generic(p_pre_comp[0][1], &p->Y);
-  fiat_p256_from_generic(p_pre_comp[0][2], &p->Z);
-  fiat_p256_felem p2[3];
-  fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0],
-                         p_pre_comp[0][1], p_pre_comp[0][2]);
-  for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) {
-    fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
-                        p_pre_comp[i - 1][0], p_pre_comp[i - 1][1],
-                        p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1],
-                        p2[2]);
-  }
-
-  // Set up the coefficients for |p_scalar|.
-  int8_t p_wNAF[257];
-  ec_compute_wNAF(p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
-
-  // Set |ret| to the point at infinity.
-  int skip = 1;  // Save some point operations.
-  fiat_p256_felem ret[3] = {{0}, {0}, {0}};
-  for (int i = 256; i >= 0; i--) {
-    if (!skip) {
-      fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]);
-    }
-
-    // For the |g_scalar|, we use the precomputed table without the
-    // constant-time lookup.
-    if (i <= 31) {
-      // First, look 32 bits upwards.
-      crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3;
-      bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2;
-      bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1;
-      bits |= fiat_p256_get_bit(g_scalar, i + 32);
-      if (bits != 0) {
-        size_t index = (size_t)(bits - 1);
-        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
-                            1 /* mixed */, fiat_p256_g_pre_comp[1][index][0],
-                            fiat_p256_g_pre_comp[1][index][1],
-                            fiat_p256_one);
-        skip = 0;
-      }
-
-      // Second, look at the current position.
-      bits = fiat_p256_get_bit(g_scalar, i + 192) << 3;
-      bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2;
-      bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1;
-      bits |= fiat_p256_get_bit(g_scalar, i);
-      if (bits != 0) {
-        size_t index = (size_t)(bits - 1);
-        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
-                            1 /* mixed */, fiat_p256_g_pre_comp[0][index][0],
-                            fiat_p256_g_pre_comp[0][index][1],
-                            fiat_p256_one);
-        skip = 0;
-      }
-    }
+  fiat_p256_felem res[3], tmp[3];
+  fiat_p256_from_generic(tmp[0], &p->X);
+  fiat_p256_from_generic(tmp[1], &p->Y);
+  fiat_p256_from_generic(tmp[2], &p->Z);
 
-    int digit = p_wNAF[i];
-    if (digit != 0) {
-      assert(digit & 1);
-      size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1);
-      fiat_p256_felem *y = &p_pre_comp[idx][1], tmp;
-      if (digit < 0) {
-        fiat_p256_opp(tmp, p_pre_comp[idx][1]);
-        y = &tmp;
-      }
-      if (!skip) {
-        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
-                            0 /* not mixed */, p_pre_comp[idx][0], *y,
-                            p_pre_comp[idx][2]);
-      } else {
-        fiat_p256_copy(ret[0], p_pre_comp[idx][0]);
-        fiat_p256_copy(ret[1], *y);
-        fiat_p256_copy(ret[2], p_pre_comp[idx][2]);
-        skip = 0;
-      }
-    }
-  }
+  ec_nistp_scalar_mul_public(p256_methods(), res[0], res[1], res[2], g_scalar, tmp[0], tmp[1], tmp[2], p_scalar);
 
-  fiat_p256_to_generic(&r->X, ret[0]);
-  fiat_p256_to_generic(&r->Y, ret[1]);
-  fiat_p256_to_generic(&r->Z, ret[2]);
+  fiat_p256_to_generic(&r->X, res[0]);
+  fiat_p256_to_generic(&r->Y, res[1]);
+  fiat_p256_to_generic(&r->Z, res[2]);
 }
 
 static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,