驳嗦 发表于 2025-9-25 21:07:43

secp256k1算法详解二(关键理论及源码分析)

1 关键结构体

1.1 secp256k1_fe

secp256k1库域元素field element,其具体定义如下
/** This field implementation represents the value as 10 uint32_t limbs in base
*2^26. */
typedef struct {
   /* A field element f represents the sum(i=0..9, f.n << (i*26)) mod p,
    * where p is the field modulus, 2^256 - 2^32 - 977.
    *
    * The individual limbs f.n can exceed 2^26; the field's magnitude roughly
    * corresponds to how much excess is allowed. The value
    * sum(i=0..9, f.n << (i*26)) may exceed p, unless the field element is
    * normalized. */
    uint32_t n;
    /*
   * Magnitude m requires:
   *   n <= 2 * m * (2^26 - 1) for i=0..8
   *   n <= 2 * m * (2^22 - 1)
   *
   * Normalized requires:
   *   n <= (2^26 - 1) for i=0..8
   *   sum(i=0..9, n << (i*26)) < p
   *   (together these imply n <= 2^22 - 1)
   */
    SECP256K1_FE_VERIFY_FIELDS
} secp256k1_fe;域的模数p = 2256 - 232 - 977,域中大数r被分为10个limbs,其标准表示为每个limb是26-bit数(除了最后一个是22-bit数),即:

如之前所描述,由于magnitude的存在,在运算过程中,会使某些limbs超过26-bit,导致进位错误。归一化目标:1 确保最终值 < p; 2 所有limbs小于226(除了t9,小于222)。
在函数中共进行了两次模约减操作,首轮模约减主要目的是去除高位t9的22-bit之后多余部分:
/** Maximum allowed magnitudes for group element coordinates
*in affine (x, y) and jacobian (x, y, z) representation. */
#define SECP256K1_GE_X_MAGNITUDE_MAX4
#define SECP256K1_GE_Y_MAGNITUDE_MAX3
#define SECP256K1_GEJ_X_MAGNITUDE_MAX 4
#define SECP256K1_GEJ_Y_MAGNITUDE_MAX 4
#define SECP256K1_GEJ_Z_MAGNITUDE_MAX 1
这里的x值为r/2256向下取整,由SECP256K1_GE_X_MAGNITUDE_MAX等宏定义可知x最大为4-bit数,因为2256 ≡ 232 + 977 mod p,所以超出2256部分即为 x*2256 ≡ x*(232 + 977) mod p ≡ x*(1 n, t2 = r->n, t3 = r->n, t4 = r->n,             t5 = r->n, t6 = r->n, t7 = r->n, t8 = r->n, t9 = r->n;    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */    uint32_t z0, z1;    /* Reduce t9 at the start so there will be at most a single carry from the first pass */    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;    /* The first pass ensures the magnitude is 1, ... */    t0 += x * 0x3D1UL; t1 += (x > 26); t0 &= 0x3FFFFFFUL; z0= t0; z1= t0 ^ 0x3D0UL;    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;                                       z0 |= t9; z1 &= t9 ^ 0x3C00000UL;    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */    return (z0 == 0) | (z1 == 0x3FFFFFFUL);}secp256k1_fe_normalize_to_zero该函数在不完全归一化(normalize)整个字段元素的情况下,判断其值是否为零或等于模数,代码中z0用于检测是否所有位都为 0,z1用于检测是否所有位构成了模数p。类似secp256k1_fe_normalizes_to_zero_var函数是secp256k1_fe_normalizes_to_zero的变体(var)版,其功能与前者一致:判断一个字段元素是否为0或模数p(即归一化后是否为 0),但使用了更快路径和懒惰归一化以提高性能,该函数的关键代码如下:
/** A group element in affine coordinates on the secp256k1 curve,
*or occasionally on an isomorphic curve of the form y^2 = x^3 + 7*t^6.
*Note: For exhaustive test mode, secp256k1 is replaced by a small subgroup of a different curve.
*/
typedef struct {
    secp256k1_fe x;
    secp256k1_fe y;
    int infinity; /* whether this represents the point at infinity */
} secp256k1_ge;若t9有进位(超过 22 位),说明有“≥2256”成分,我们提取出这部分(乘以 0x3D1 并加回 t0),这是归一化关键。然后立即对 t0 做一次判断:z0 == 0 表示可能是 0,z1 == 0x3FFFFFFUL 表示可能是 P(因为 t0 应该是 0x3D0),如果两个都不满足,直接返回 0,这是个非常快的判断分支。其他部分代码就和secp256k1_fe_normalizes_to_zero一致了。
3.3 secp256k1_fe_mul_inner

该函数是大数乘法的内部实现,libsecp256k1的作者Pieter Wuille的设计理念可以总结为:"所有公共API都要求输入是完全归一化的,但内部函数可以处理非归一化形式。中间值的大小必须受控,但可以比26位大(比如在该函数内部被限制在30bit),为的是避免频繁的carry"。以下是完整函数
1 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {2   uint64_t c, d;3   uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;4   uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;5   const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;6   7   VERIFY_BITS(a, 30);8   VERIFY_BITS(a, 30);9   VERIFY_BITS(a, 30); 10   VERIFY_BITS(a, 30); 11   VERIFY_BITS(a, 30); 12   VERIFY_BITS(a, 30); 13   VERIFY_BITS(a, 30); 14   VERIFY_BITS(a, 30); 15   VERIFY_BITS(a, 30); 16   VERIFY_BITS(a, 26); 17   VERIFY_BITS(b, 30); 18   VERIFY_BITS(b, 30); 19   VERIFY_BITS(b, 30); 20   VERIFY_BITS(b, 30); 21   VERIFY_BITS(b, 30); 22   VERIFY_BITS(b, 30); 23   VERIFY_BITS(b, 30); 24   VERIFY_BITS(b, 30); 25   VERIFY_BITS(b, 30); 26   VERIFY_BITS(b, 26); 2728   /** [... a b c] is a shorthand for ... + a= 26; c += u1 * R1; 96   VERIFY_BITS(t1, 26); 97   VERIFY_BITS(c, 38); 98   /* = */ 99   /* = */100 101   c += (uint64_t)a * b102      + (uint64_t)a * b103      + (uint64_t)a * b;104   VERIFY_BITS(c, 62);105   /* = */106   d += (uint64_t)a * b107      + (uint64_t)a * b108      + (uint64_t)a * b109      + (uint64_t)a * b110      + (uint64_t)a * b111      + (uint64_t)a * b112      + (uint64_t)a * b;113   VERIFY_BITS(d, 63);114   /* = */115   u2 = d & M; d >>= 26; c += u2 * R0;116   VERIFY_BITS(u2, 26);117   VERIFY_BITS(d, 37);118   VERIFY_BITS(c, 63);119   /* = */120   t2 = c & M; c >>= 26; c += u2 * R1;121   VERIFY_BITS(t2, 26);122   VERIFY_BITS(c, 38);123   /* = */124   /* = */125 126   c += (uint64_t)a * b127      + (uint64_t)a * b128      + (uint64_t)a * b129      + (uint64_t)a * b;130   VERIFY_BITS(c, 63);131   /* = */132   d += (uint64_t)a * b133      + (uint64_t)a * b134      + (uint64_t)a * b135      + (uint64_t)a * b136      + (uint64_t)a * b137      + (uint64_t)a * b;138   VERIFY_BITS(d, 63);139   /* = */140   u3 = d & M; d >>= 26; c += u3 * R0;141   VERIFY_BITS(u3, 26);142   VERIFY_BITS(d, 37);143   /* VERIFY_BITS(c, 64); */144   /* = */145   t3 = c & M; c >>= 26; c += u3 * R1;146   VERIFY_BITS(t3, 26);147   VERIFY_BITS(c, 39);148   /* = */149   /* = */150 151   c += (uint64_t)a * b152      + (uint64_t)a * b153      + (uint64_t)a * b154      + (uint64_t)a * b155      + (uint64_t)a * b;156   VERIFY_BITS(c, 63);157   /* = */158   d += (uint64_t)a * b159      + (uint64_t)a * b160      + (uint64_t)a * b161      + (uint64_t)a * b162      + (uint64_t)a * b;163   VERIFY_BITS(d, 62);164   /* = */165   u4 = d & M; d >>= 26; c += u4 * R0;166   VERIFY_BITS(u4, 26);167   VERIFY_BITS(d, 36);168   /* VERIFY_BITS(c, 64); */169   /* = */170   t4 = c & M; c >>= 26; c += u4 * R1;171   VERIFY_BITS(t4, 26);172   VERIFY_BITS(c, 39);173   /* = */174   /* = */175 176   c += (uint64_t)a * b177      + (uint64_t)a * b178      + (uint64_t)a * b179      + (uint64_t)a * b180      + (uint64_t)a * b181      + (uint64_t)a * b;182   VERIFY_BITS(c, 63);183   /* = */184   d += (uint64_t)a * b185      + (uint64_t)a * b186      + (uint64_t)a * b187      + (uint64_t)a * b;188   VERIFY_BITS(d, 62);189   /* = */190   u5 = d & M; d >>= 26; c += u5 * R0;191   VERIFY_BITS(u5, 26);192   VERIFY_BITS(d, 36);193   /* VERIFY_BITS(c, 64); */194   /* = */195   t5 = c & M; c >>= 26; c += u5 * R1;196   VERIFY_BITS(t5, 26);197   VERIFY_BITS(c, 39);198   /* = */199   /* = */200 201   c += (uint64_t)a * b202      + (uint64_t)a * b203      + (uint64_t)a * b204      + (uint64_t)a * b205      + (uint64_t)a * b206      + (uint64_t)a * b207      + (uint64_t)a * b;208   VERIFY_BITS(c, 63);209   /* = */210   d += (uint64_t)a * b211      + (uint64_t)a * b212      + (uint64_t)a * b;213   VERIFY_BITS(d, 61);214   /* = */215   u6 = d & M; d >>= 26; c += u6 * R0;216   VERIFY_BITS(u6, 26);217   VERIFY_BITS(d, 35);218   /* VERIFY_BITS(c, 64); */219   /* = */220   t6 = c & M; c >>= 26; c += u6 * R1;221   VERIFY_BITS(t6, 26);222   VERIFY_BITS(c, 39);223   /* = */224   /* = */225 226   c += (uint64_t)a * b227      + (uint64_t)a * b228      + (uint64_t)a * b229      + (uint64_t)a * b230      + (uint64_t)a * b231      + (uint64_t)a * b232      + (uint64_t)a * b233      + (uint64_t)a * b;234   /* VERIFY_BITS(c, 64); */235   VERIFY_CHECK(c >= 26; c += u7 * R0;242   VERIFY_BITS(u7, 26);243   VERIFY_BITS(d, 32);244   /* VERIFY_BITS(c, 64); */245   VERIFY_CHECK(c >= 26; c += u7 * R1;248   VERIFY_BITS(t7, 26);249   VERIFY_BITS(c, 38);250   /* = */251   /* = */252 253   c += (uint64_t)a * b254      + (uint64_t)a * b255      + (uint64_t)a * b256      + (uint64_t)a * b257      + (uint64_t)a * b258      + (uint64_t)a * b259      + (uint64_t)a * b260      + (uint64_t)a * b261      + (uint64_t)a * b;262   /* VERIFY_BITS(c, 64); */263   VERIFY_CHECK(c >= 26; c += u8 * R0;269   VERIFY_BITS(u8, 26);270   VERIFY_BITS(d, 31);271   /* VERIFY_BITS(c, 64); */272   VERIFY_CHECK(c >= 26; c += u8 * R1;292   VERIFY_BITS(r, 26);293   VERIFY_BITS(c, 39);294   /* = */295   /* = */296   c   += d * R0 + t9;297   VERIFY_BITS(c, 45);298   /* = */299   r = c & (M >> 4); c >>= 22; c += d * (R1 = 26;319   VERIFY_BITS(r, 26);320   VERIFY_BITS(d, 27);321   VERIFY_CHECK(d
页: [1]
查看完整版本: secp256k1算法详解二(关键理论及源码分析)