Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(447)

Delta Between Two Patch Sets: src/pkg/crypto/elliptic/p256.go

Issue 10551044: code review 10551044: crypto/elliptic: add constant-time, P-256 implementation. (Closed)
Left Patch Set: Created 10 years, 9 months ago
Right Patch Set: diff -r 4fe0404effd1 https://code.google.com/p/go/ Created 10 years, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/crypto/elliptic/elliptic_test.go ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
(no file at all)
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package elliptic
6
7 // This file contains a constant-time, 32-bit implementation of P256.
8
9 import (
10 "math/big"
11 )
12
13 type p256Curve struct {
14 *CurveParams
15 }
16
17 var (
18 p256 p256Curve
19 // RInverse contains 1/R mod p - the inverse of the Montgomery constant
20 // (2**257).
21 p256RInverse *big.Int
22 )
23
24 func initP256() {
25 // See FIPS 186-3, section D.2.3
26 p256.CurveParams = new(CurveParams)
27 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573 530086143415290314195533631308867097853951", 10)
28 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573 529996955224135760342422259061068512044369", 10)
29 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d 06b0cc53b0f63bce3c3e27d2604b", 16)
30 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f2770 37d812deb33a0f4a13945d898c296", 16)
31 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bc e33576b315ececbb6406837bf51f5", 16)
32 p256.BitSize = 256
33
34 p256RInverse, _ = new(big.Int).SetString("7fffffff00000001fffffffe800000 0100000000ffffffff0000000180000000", 16)
35 }
36
37 func (curve p256Curve) Params() *CurveParams {
38 return curve.CurveParams
39 }
40
41 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
42 // to out. If the scalar is equal or greater than the order of the group, it's
43 // reduced modulo that order.
44 func p256GetScalar(out *[32]byte, in []byte) {
45 n := new(big.Int).SetBytes(in)
46 var scalarBytes []byte
47
48 if n.Cmp(p256.N) >= 0 {
49 n.Mod(n, p256.N)
50 scalarBytes = n.Bytes()
51 } else {
52 scalarBytes = in
53 }
54
55 for i, v := range scalarBytes {
56 out[len(scalarBytes)-(1+i)] = v
57 }
58 }
59
60 func (p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
61 var scalarReversed [32]byte
62 p256GetScalar(&scalarReversed, scalar)
63
64 var x1, y1, z1 [p256Limbs]uint32
65 p256ScalarBaseMult(&x1, &y1, &z1, &scalarReversed)
66 return p256ToAffine(&x1, &y1, &z1)
67 }
68
69 func (p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
70 var scalarReversed [32]byte
71 p256GetScalar(&scalarReversed, scalar)
72
73 var px, py, x1, y1, z1 [p256Limbs]uint32
74 p256FromBig(&px, bigX)
75 p256FromBig(&py, bigY)
76 p256ScalarMult(&x1, &y1, &z1, &px, &py, &scalarReversed)
77 return p256ToAffine(&x1, &y1, &z1)
78 }
79
80 // Field elements are represented as nine, unsigned 32-bit words.
81 //
82 // The value of an field element is:
83 // x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
84 //
85 // That is, each limb is alternately 29 or 28-bits wide in little-endian
86 // order.
87 //
88 // This means that a field element hits 2**257, rather than 2**256 as we would
89 // like. A 28, 29, ... pattern would cause us to hit 2**256, but that causes
90 // problems when multiplying as terms end up one bit short of a limb which
91 // would require much bit-shifting to correct.
92 //
93 // Finally, the values stored in a field element are in Montgomery form. So the
94 // value |y| is stored as (y*R) mod p, where p is the P-256 prime and R is
95 // 2**257.
96
97 const (
98 p256Limbs = 9
99 bottom29Bits = 0x1fffffff
100 )
101
102 var (
103 // p256One is the number 1 as a field element.
104 p256One = [p256Limbs]uint32{2, 0, 0, 0xffff800, 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, 0}
105 p256Zero = [p256Limbs]uint32{0, 0, 0, 0, 0, 0, 0, 0, 0}
106 // p256P is the prime modulus as a field element.
107 p256P = [p256Limbs]uint32{0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, 0, 0 , 0x200000, 0xf000000, 0xfffffff}
108 // p2562P is the twice prime modulus as a field element.
109 p2562P = [p256Limbs]uint32{0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, 0, 0, 0x400000, 0xe000000, 0x1fffffff}
110 )
111
112 // p256Precomputed contains precomputed values to aid the calculation of scalar
113 // multiples of the base point, G. It's actually two, equal length, tables
114 // concatenated.
115 //
116 // The first table contains (x,y) field element pairs for 16 multiples of the
117 // base point, G.
118 //
119 // Index | Index (binary) | Value
120 // 0 | 0000 | 0G (all zeros, omitted)
121 // 1 | 0001 | G
122 // 2 | 0010 | 2**64G
123 // 3 | 0011 | 2**64G + G
124 // 4 | 0100 | 2**128G
125 // 5 | 0101 | 2**128G + G
126 // 6 | 0110 | 2**128G + 2**64G
127 // 7 | 0111 | 2**128G + 2**64G + G
128 // 8 | 1000 | 2**192G
129 // 9 | 1001 | 2**192G + G
130 // 10 | 1010 | 2**192G + 2**64G
131 // 11 | 1011 | 2**192G + 2**64G + G
132 // 12 | 1100 | 2**192G + 2**128G
133 // 13 | 1101 | 2**192G + 2**128G + G
134 // 14 | 1110 | 2**192G + 2**128G + 2**64G
135 // 15 | 1111 | 2**192G + 2**128G + 2**64G + G
136 //
137 // The second table follows the same style, but the terms are 2**32G,
138 // 2**96G, 2**160G, 2**224G.
139 //
140 // This is ~2KB of data.
141 var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{
142 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x11 9e7edc, 0xd4a6eab, 0x3120bee,
143 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x1 54ba21, 0x14b10bb, 0xae3fe3,
144 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe 49073, 0x3fa36cc, 0x5ebcd2c,
145 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea 12446, 0xe1ade1e, 0xec91f22,
146 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c 7109, 0xa267a00, 0xb57c050,
147 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c 5, 0x7d6dee7, 0x2976e4b,
148 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1 d96a5a9, 0x843a649, 0xc3ab0fa,
149 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf64 3e11, 0x58c43df, 0xf423fc2,
150 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x17 2db40f, 0x83e277d, 0xb0dd609,
151 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449 e3f5, 0xe10c9e, 0x33ab581,
152 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35 df9f, 0x48764cd, 0x76dbcca,
153 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f 16b20, 0x4ba3173, 0xc168c33,
154 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322 c4c0, 0x65dd7ff, 0x3a1e4f6,
155 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x34 1f077, 0xa6add89, 0x4894acd,
156 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a29 01a, 0x69a8556, 0x7e7c0,
157 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051 825c, 0xda0cf5b, 0x812e881,
158 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe8 0c51, 0xc22be3e, 0xe35e65a,
159 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c1 84e9, 0x1c5a839, 0x47a1e26,
160 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a 90c502, 0x2f32042, 0xa17769b,
161 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10 d06a02, 0x3fc93, 0x5620023,
162 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc 513c, 0x407f75c, 0xbaab133,
163 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x104 69ea7, 0x3293ac0, 0xcdc98aa,
164 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62 f16, 0x2b6fcc7, 0xf5a4e29,
165 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x102 9f72, 0x73e1c35, 0xee70fbc,
166 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83 de85, 0x27de188, 0x66f70b8,
167 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x1 54ae914, 0x2f3ec51, 0x3826b59,
168 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061 , 0x823d9d2, 0x8213f39,
169 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x184 11a4a, 0xf5ddc3d, 0x3786689,
170 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x105 1a729, 0x4be3499, 0x52b23aa,
171 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb 048035, 0xe31de66, 0xc6ecaa3,
172 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4 a7529, 0xcb7beb1, 0xb2a78a1,
173 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1c bff658, 0xe3d6511, 0xc7d76f,
174 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc501 24c, 0x50daa90, 0xb13f72,
175 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d 32411, 0xb04a838, 0xd760d2d,
176 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c 9e11e, 0x20bca9a, 0x66f496b,
177 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x15829 68d, 0xbe985f7, 0x1acbc1a,
178 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17 fa56ff, 0x65ef930, 0x21dc4a,
179 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112 ac15f, 0x624e62e, 0xa90ae2f,
180 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x72 5522b, 0xdc78583, 0x40eeabb,
181 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x36 7ef34, 0xae2a960, 0x91b8bdc,
182 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316ab b, 0x2413c8e, 0x5425bf9,
183 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e 7633, 0x7c91952, 0xd806dce,
184 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc 4ef73, 0x8956f34, 0xe4b5cf2,
185 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf92 7ed7, 0x627b614, 0x7371cca,
186 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e 3edc9, 0x9c19bf2, 0x5882229,
187 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c 9b5b3, 0xe85ff25, 0x408ef57,
188 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa03 8113, 0xa4a1769, 0x11fbc6c,
189 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15c d60b7, 0x4acbad9, 0x5efc5fa,
190 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x1961 42cc, 0x7bf0fa9, 0x957651,
191 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7 d57, 0xf2ecaac, 0xca86dec,
192 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19 d1c12d, 0xf20bd46, 0x1951fa7,
193 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9 fc74, 0x99bb618, 0x2db944c,
194 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e 74779, 0x576138, 0x9587927,
195 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d0 7782d, 0xfc72e0b, 0x701b298,
196 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14 e2f5d8, 0xf858d3a, 0x942eea8,
197 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x198 1d7a1, 0x8395659, 0x52ed4e2,
198 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c 146c0, 0x6bdf55a, 0x4e4457d,
199 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x40 4747b, 0x878558d, 0x7d29aa4,
200 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81 d55d7, 0xa5bef68, 0xb7b30d8,
201 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f 51951, 0x9d0c177, 0x1c49a78,
202 }
203
204 // Field element operations:
205
206 // nonZeroToAllOnes returns:
207 // 0xffffffff for 0 < x <= 2**31
208 // 0 for x == 0 or x > 2**31.
209 func nonZeroToAllOnes(x uint32) uint32 {
210 return ((x - 1) >> 31) - 1
211 }
212
213 // p256ReduceCarry adds a multiple of p in order to cancel |carry|,
214 // which is a term at 2**257.
215 //
216 // On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
217 // On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
218 func p256ReduceCarry(inout *[p256Limbs]uint32, carry uint32) {
219 carry_mask := nonZeroToAllOnes(carry)
220
221 inout[0] += carry << 1
222 inout[3] += 0x10000000 & carry_mask
223 // carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
224 // previous line therefore this doesn't underflow.
225 inout[3] -= carry << 11
226 inout[4] += (0x20000000 - 1) & carry_mask
227 inout[5] += (0x10000000 - 1) & carry_mask
228 inout[6] += (0x20000000 - 1) & carry_mask
229 inout[6] -= carry << 22
230 // This may underflow if carry is non-zero but, if so, we'll fix it in t he
231 // next line.
232 inout[7] -= 1 & carry_mask
233 inout[7] += carry << 25
234 }
235
236 // p256Sum sets out = in+in2.
237 //
238 // On entry, in[i]+in2[i] must not overflow a 32-bit word.
239 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
240 func p256Sum(out, in, in2 *[p256Limbs]uint32) {
241 carry := uint32(0)
242 for i := 0; ; i++ {
243 out[i] = in[i] + in2[i]
244 out[i] += carry
245 carry = out[i] >> 29
246 out[i] &= bottom29Bits
247
248 i++
249 if i == p256Limbs {
250 break
251 }
252
253 out[i] = in[i] + in2[i]
254 out[i] += carry
255 carry = out[i] >> 28
256 out[i] &= bottom28Bits
257 }
258
259 p256ReduceCarry(out, carry)
260 }
261
262 const (
263 two30m2 = 1<<30 - 1<<2
264 two30p13m2 = 1<<30 + 1<<13 - 1<<2
265 two31m2 = 1<<31 - 1<<2
266 two31p24m2 = 1<<31 + 1<<24 - 1<<2
267 two30m27m2 = 1<<30 - 1<<27 - 1<<2
268 )
269
270 // p256Zero31 is 0 mod p.
271 var p256Zero31 = [p256Limbs]uint32{two31m3, two30m2, two31m2, two30p13m2, two31m 2, two30m2, two31p24m2, two30m27m2, two31m2}
272
273 // p256Diff sets out = in-in2.
274 //
275 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
276 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
277 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
278 func p256Diff(out, in, in2 *[p256Limbs]uint32) {
279 var carry uint32
280
281 for i := 0; ; i++ {
282 out[i] = in[i] - in2[i]
283 out[i] += p256Zero31[i]
284 out[i] += carry
285 carry = out[i] >> 29
286 out[i] &= bottom29Bits
287
288 i++
289 if i == p256Limbs {
290 break
291 }
292
293 out[i] = in[i] - in2[i]
294 out[i] += p256Zero31[i]
295 out[i] += carry
296 carry = out[i] >> 28
297 out[i] &= bottom28Bits
298 }
299
300 p256ReduceCarry(out, carry)
301 }
302
303 // p256ReduceDegree sets out = tmp/R mod p where tmp contains 64-bit words with
304 // the same 29,28,... bit positions as an field element.
305 //
306 // The values in field elements are in Montgomery form: x*R mod p where R =
307 // 2**257. Since we just multiplied two Montgomery values together, the result
308 // is x*y*R*R mod p. We wish to divide by R in order for the result also to be
309 // in Montgomery form.
310 //
311 // On entry: tmp[i] < 2**64
312 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
313 func p256ReduceDegree(out *[p256Limbs]uint32, tmp [17]uint64) {
314 // The following table may be helpful when reading this code:
315 //
316 // Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
317 // Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
318 // Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285
319 // (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285
320 var tmp2 [18]uint32
321 var carry, x, xMask uint32
322
323 // tmp contains 64-bit words with the same 29,28,29-bit positions as an
324 // field element. So the top of an element of tmp might overlap with
325 // another element two positions down. The following loop eliminates
326 // this overlap.
327 tmp2[0] = uint32(tmp[0]) & bottom29Bits
328
329 tmp2[1] = uint32(tmp[0]) >> 29
330 tmp2[1] |= (uint32(tmp[0]>>32) << 3) & bottom28Bits
331 tmp2[1] += uint32(tmp[1]) & bottom28Bits
332 carry = tmp2[1] >> 28
333 tmp2[1] &= bottom28Bits
334
335 for i := 2; i < 17; i++ {
336 tmp2[i] = (uint32(tmp[i-2] >> 32)) >> 25
337 tmp2[i] += (uint32(tmp[i-1])) >> 28
338 tmp2[i] += (uint32(tmp[i-1]>>32) << 4) & bottom29Bits
339 tmp2[i] += uint32(tmp[i]) & bottom29Bits
340 tmp2[i] += carry
341 carry = tmp2[i] >> 29
342 tmp2[i] &= bottom29Bits
343
344 i++
345 if i == 17 {
346 break
347 }
348 tmp2[i] = uint32(tmp[i-2]>>32) >> 25
349 tmp2[i] += uint32(tmp[i-1]) >> 29
350 tmp2[i] += ((uint32(tmp[i-1] >> 32)) << 3) & bottom28Bits
351 tmp2[i] += uint32(tmp[i]) & bottom28Bits
352 tmp2[i] += carry
353 carry = tmp2[i] >> 28
354 tmp2[i] &= bottom28Bits
355 }
356
357 tmp2[17] = uint32(tmp[15]>>32) >> 25
358 tmp2[17] += uint32(tmp[16]) >> 29
359 tmp2[17] += uint32(tmp[16]>>32) << 3
360 tmp2[17] += carry
361
362 // Montgomery elimination of terms:
363 //
364 // Since R is 2**257, we can divide by R with a bitwise shift if we can
365 // ensure that the right-most 257 bits are all zero. We can make that tr ue
366 // by adding multiplies of p without affecting the value.
367 //
368 // So we eliminate limbs from right to left. Since the bottom 29 bits of p
369 // are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
370 // We can do that for 8 further limbs and then right shift to eliminate the
371 // extra factor of R.
372 for i := 0; ; i += 2 {
373 tmp2[i+1] += tmp2[i] >> 29
374 x = tmp2[i] & bottom29Bits
375 xMask = nonZeroToAllOnes(x)
376 tmp2[i] = 0
377
378 // The bounds calculations for this loop are tricky. Each iterat ion of
379 // the loop eliminates two words by adding values to words to th eir
380 // right.
381 //
382 // The following table contains the amounts added to each word ( as an
383 // offset from the value of i at the top of the loop). The amoun ts are
384 // accounted for from the first and second half of the loop sepa rately
385 // and are written as, for example, 28 to mean a value <2**28.
386 //
387 // Word: 3 4 5 6 7 8 9 10
388 // Added in top half: 28 11 29 21 29 28
389 // 28 29
390 // 29
391 // Added in bottom half: 29 10 28 21 28 28
392 // 29
393 //
394 // The value that is currently offset 7 will be offset 5 for the next
395 // iteration and then offset 3 for the iteration after that. The refore
396 // the total value added will be the values added at 7, 5 and 3.
397 //
398 // The following table accumulates these values. The sums at the bottom
399 // are written as, for example, 29+28, to mean a value < 2**29+2 **28.
400 //
401 // Word: 3 4 5 6 7 8 9 10 11 12 13
402 // 28 11 10 29 21 29 28 28 28 28 28
403 // 29 28 11 28 29 28 29 28 29 28
404 // 29 28 21 21 29 21 29 21
405 // 10 29 28 21 28 21 28
406 // 28 29 28 29 28 29 28
407 // 11 10 29 10 29 10
408 // 29 28 11 28 11
409 // 29 29
410 // -------------------------------------- ------
411 // 30+ 31+ 30+ 31 + 30+
412 // 28+ 29+ 28+ 29 + 21+
413 // 21+ 28+ 21+ 28 + 10
414 // 10 21+ 10 21 +
415 // 11 11
416 //
417 // So the greatest amount is added to tmp2[10] and tmp2[12]. If
418 // tmp2[10/12] has an initial value of <2**29, then the maximum value
419 // will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2 **32,
420 // as required.
421 tmp2[i+3] += (x << 10) & bottom28Bits
422 tmp2[i+4] += (x >> 18)
423
424 tmp2[i+6] += (x << 21) & bottom29Bits
425 tmp2[i+7] += x >> 8
426
427 // At position 200, which is the starting bit position for word 7, we
428 // have a factor of 0xf000000 = 2**28 - 2**24.
429 tmp2[i+7] += 0x10000000 & xMask
430 tmp2[i+8] += (x - 1) & xMask
431 tmp2[i+7] -= (x << 24) & bottom28Bits
432 tmp2[i+8] -= x >> 4
433
434 tmp2[i+8] += 0x20000000 & xMask
435 tmp2[i+8] -= x
436 tmp2[i+8] += (x << 28) & bottom29Bits
437 tmp2[i+9] += ((x >> 1) - 1) & xMask
438
439 if i+1 == p256Limbs {
440 break
441 }
442 tmp2[i+2] += tmp2[i+1] >> 28
443 x = tmp2[i+1] & bottom28Bits
444 xMask = nonZeroToAllOnes(x)
445 tmp2[i+1] = 0
446
447 tmp2[i+4] += (x << 11) & bottom29Bits
448 tmp2[i+5] += (x >> 18)
449
450 tmp2[i+7] += (x << 21) & bottom28Bits
451 tmp2[i+8] += x >> 7
452
453 // At position 199, which is the starting bit of the 8th word wh en
454 // dealing with a context starting on an odd word, we have a fac tor of
455 // 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
456 // word from i+1 is i+8.
457 tmp2[i+8] += 0x20000000 & xMask
458 tmp2[i+9] += (x - 1) & xMask
459 tmp2[i+8] -= (x << 25) & bottom29Bits
460 tmp2[i+9] -= x >> 4
461
462 tmp2[i+9] += 0x10000000 & xMask
463 tmp2[i+9] -= x
464 tmp2[i+10] += (x - 1) & xMask
465 }
466
467 // We merge the right shift with a carry chain. The words above 2**257 h ave
468 // widths of 28,29,... which we need to correct when copying them down.
469 carry = 0
470 for i := 0; i < 8; i++ {
471 // The maximum value of tmp2[i + 9] occurs on the first iteratio n and
472 // is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
473 // therefore safe.
474 out[i] = tmp2[i+9]
475 out[i] += carry
476 out[i] += (tmp2[i+10] << 28) & bottom29Bits
477 carry = out[i] >> 29
478 out[i] &= bottom29Bits
479
480 i++
481 out[i] = tmp2[i+9] >> 1
482 out[i] += carry
483 carry = out[i] >> 28
484 out[i] &= bottom28Bits
485 }
486
487 out[8] = tmp2[17]
488 out[8] += carry
489 carry = out[8] >> 29
490 out[8] &= bottom29Bits
491
492 p256ReduceCarry(out, carry)
493 }
494
495 // p256Square sets out=in*in.
496 //
497 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
498 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
499 func p256Square(out, in *[p256Limbs]uint32) {
500 var tmp [17]uint64
501
502 tmp[0] = uint64(in[0]) * uint64(in[0])
503 tmp[1] = uint64(in[0]) * (uint64(in[1]) << 1)
504 tmp[2] = uint64(in[0])*(uint64(in[2])<<1) +
505 uint64(in[1])*(uint64(in[1])<<1)
506 tmp[3] = uint64(in[0])*(uint64(in[3])<<1) +
507 uint64(in[1])*(uint64(in[2])<<1)
508 tmp[4] = uint64(in[0])*(uint64(in[4])<<1) +
509 uint64(in[1])*(uint64(in[3])<<2) +
510 uint64(in[2])*uint64(in[2])
511 tmp[5] = uint64(in[0])*(uint64(in[5])<<1) +
512 uint64(in[1])*(uint64(in[4])<<1) +
513 uint64(in[2])*(uint64(in[3])<<1)
514 tmp[6] = uint64(in[0])*(uint64(in[6])<<1) +
515 uint64(in[1])*(uint64(in[5])<<2) +
516 uint64(in[2])*(uint64(in[4])<<1) +
517 uint64(in[3])*(uint64(in[3])<<1)
518 tmp[7] = uint64(in[0])*(uint64(in[7])<<1) +
519 uint64(in[1])*(uint64(in[6])<<1) +
520 uint64(in[2])*(uint64(in[5])<<1) +
521 uint64(in[3])*(uint64(in[4])<<1)
522 // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**6 0,
523 // which is < 2**64 as required.
524 tmp[8] = uint64(in[0])*(uint64(in[8])<<1) +
525 uint64(in[1])*(uint64(in[7])<<2) +
526 uint64(in[2])*(uint64(in[6])<<1) +
527 uint64(in[3])*(uint64(in[5])<<2) +
528 uint64(in[4])*uint64(in[4])
529 tmp[9] = uint64(in[1])*(uint64(in[8])<<1) +
530 uint64(in[2])*(uint64(in[7])<<1) +
531 uint64(in[3])*(uint64(in[6])<<1) +
532 uint64(in[4])*(uint64(in[5])<<1)
533 tmp[10] = uint64(in[2])*(uint64(in[8])<<1) +
534 uint64(in[3])*(uint64(in[7])<<2) +
535 uint64(in[4])*(uint64(in[6])<<1) +
536 uint64(in[5])*(uint64(in[5])<<1)
537 tmp[11] = uint64(in[3])*(uint64(in[8])<<1) +
538 uint64(in[4])*(uint64(in[7])<<1) +
539 uint64(in[5])*(uint64(in[6])<<1)
540 tmp[12] = uint64(in[4])*(uint64(in[8])<<1) +
541 uint64(in[5])*(uint64(in[7])<<2) +
542 uint64(in[6])*uint64(in[6])
543 tmp[13] = uint64(in[5])*(uint64(in[8])<<1) +
544 uint64(in[6])*(uint64(in[7])<<1)
545 tmp[14] = uint64(in[6])*(uint64(in[8])<<1) +
546 uint64(in[7])*(uint64(in[7])<<1)
547 tmp[15] = uint64(in[7]) * (uint64(in[8]) << 1)
548 tmp[16] = uint64(in[8]) * uint64(in[8])
549
550 p256ReduceDegree(out, tmp)
551 }
552
553 // p256Mul sets out=in*in2.
554 //
555 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
556 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
557 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
558 func p256Mul(out, in, in2 *[p256Limbs]uint32) {
559 var tmp [17]uint64
560
561 tmp[0] = uint64(in[0]) * uint64(in2[0])
562 tmp[1] = uint64(in[0])*(uint64(in2[1])<<0) +
563 uint64(in[1])*(uint64(in2[0])<<0)
564 tmp[2] = uint64(in[0])*(uint64(in2[2])<<0) +
565 uint64(in[1])*(uint64(in2[1])<<1) +
566 uint64(in[2])*(uint64(in2[0])<<0)
567 tmp[3] = uint64(in[0])*(uint64(in2[3])<<0) +
568 uint64(in[1])*(uint64(in2[2])<<0) +
569 uint64(in[2])*(uint64(in2[1])<<0) +
570 uint64(in[3])*(uint64(in2[0])<<0)
571 tmp[4] = uint64(in[0])*(uint64(in2[4])<<0) +
572 uint64(in[1])*(uint64(in2[3])<<1) +
573 uint64(in[2])*(uint64(in2[2])<<0) +
574 uint64(in[3])*(uint64(in2[1])<<1) +
575 uint64(in[4])*(uint64(in2[0])<<0)
576 tmp[5] = uint64(in[0])*(uint64(in2[5])<<0) +
577 uint64(in[1])*(uint64(in2[4])<<0) +
578 uint64(in[2])*(uint64(in2[3])<<0) +
579 uint64(in[3])*(uint64(in2[2])<<0) +
580 uint64(in[4])*(uint64(in2[1])<<0) +
581 uint64(in[5])*(uint64(in2[0])<<0)
582 tmp[6] = uint64(in[0])*(uint64(in2[6])<<0) +
583 uint64(in[1])*(uint64(in2[5])<<1) +
584 uint64(in[2])*(uint64(in2[4])<<0) +
585 uint64(in[3])*(uint64(in2[3])<<1) +
586 uint64(in[4])*(uint64(in2[2])<<0) +
587 uint64(in[5])*(uint64(in2[1])<<1) +
588 uint64(in[6])*(uint64(in2[0])<<0)
589 tmp[7] = uint64(in[0])*(uint64(in2[7])<<0) +
590 uint64(in[1])*(uint64(in2[6])<<0) +
591 uint64(in[2])*(uint64(in2[5])<<0) +
592 uint64(in[3])*(uint64(in2[4])<<0) +
593 uint64(in[4])*(uint64(in2[3])<<0) +
594 uint64(in[5])*(uint64(in2[2])<<0) +
595 uint64(in[6])*(uint64(in2[1])<<0) +
596 uint64(in[7])*(uint64(in2[0])<<0)
597 // tmp[8] has the greatest value but doesn't overflow. See logic in
598 // p256Square.
599 tmp[8] = uint64(in[0])*(uint64(in2[8])<<0) +
600 uint64(in[1])*(uint64(in2[7])<<1) +
601 uint64(in[2])*(uint64(in2[6])<<0) +
602 uint64(in[3])*(uint64(in2[5])<<1) +
603 uint64(in[4])*(uint64(in2[4])<<0) +
604 uint64(in[5])*(uint64(in2[3])<<1) +
605 uint64(in[6])*(uint64(in2[2])<<0) +
606 uint64(in[7])*(uint64(in2[1])<<1) +
607 uint64(in[8])*(uint64(in2[0])<<0)
608 tmp[9] = uint64(in[1])*(uint64(in2[8])<<0) +
609 uint64(in[2])*(uint64(in2[7])<<0) +
610 uint64(in[3])*(uint64(in2[6])<<0) +
611 uint64(in[4])*(uint64(in2[5])<<0) +
612 uint64(in[5])*(uint64(in2[4])<<0) +
613 uint64(in[6])*(uint64(in2[3])<<0) +
614 uint64(in[7])*(uint64(in2[2])<<0) +
615 uint64(in[8])*(uint64(in2[1])<<0)
616 tmp[10] = uint64(in[2])*(uint64(in2[8])<<0) +
617 uint64(in[3])*(uint64(in2[7])<<1) +
618 uint64(in[4])*(uint64(in2[6])<<0) +
619 uint64(in[5])*(uint64(in2[5])<<1) +
620 uint64(in[6])*(uint64(in2[4])<<0) +
621 uint64(in[7])*(uint64(in2[3])<<1) +
622 uint64(in[8])*(uint64(in2[2])<<0)
623 tmp[11] = uint64(in[3])*(uint64(in2[8])<<0) +
624 uint64(in[4])*(uint64(in2[7])<<0) +
625 uint64(in[5])*(uint64(in2[6])<<0) +
626 uint64(in[6])*(uint64(in2[5])<<0) +
627 uint64(in[7])*(uint64(in2[4])<<0) +
628 uint64(in[8])*(uint64(in2[3])<<0)
629 tmp[12] = uint64(in[4])*(uint64(in2[8])<<0) +
630 uint64(in[5])*(uint64(in2[7])<<1) +
631 uint64(in[6])*(uint64(in2[6])<<0) +
632 uint64(in[7])*(uint64(in2[5])<<1) +
633 uint64(in[8])*(uint64(in2[4])<<0)
634 tmp[13] = uint64(in[5])*(uint64(in2[8])<<0) +
635 uint64(in[6])*(uint64(in2[7])<<0) +
636 uint64(in[7])*(uint64(in2[6])<<0) +
637 uint64(in[8])*(uint64(in2[5])<<0)
638 tmp[14] = uint64(in[6])*(uint64(in2[8])<<0) +
639 uint64(in[7])*(uint64(in2[7])<<1) +
640 uint64(in[8])*(uint64(in2[6])<<0)
641 tmp[15] = uint64(in[7])*(uint64(in2[8])<<0) +
642 uint64(in[8])*(uint64(in2[7])<<0)
643 tmp[16] = uint64(in[8]) * (uint64(in2[8]) << 0)
644
645 p256ReduceDegree(out, tmp)
646 }
647
648 func p256Assign(out, in *[p256Limbs]uint32) {
649 *out = *in
650 }
651
652 // p256Invert calculates |out| = |in|^{-1}
653 //
654 // Based on Fermat's Little Theorem:
655 // a^p = a (mod p)
656 // a^{p-1} = 1 (mod p)
657 // a^{p-2} = a^{-1} (mod p)
658 func p256Invert(out, in *[p256Limbs]uint32) {
659 var ftmp, ftmp2 [p256Limbs]uint32
660
661 // each e_I will hold |in|^{2^I - 1}
662 var e2, e4, e8, e16, e32, e64 [p256Limbs]uint32
663
664 p256Square(&ftmp, in) // 2^1
665 p256Mul(&ftmp, in, &ftmp) // 2^2 - 2^0
666 p256Assign(&e2, &ftmp)
667 p256Square(&ftmp, &ftmp) // 2^3 - 2^1
668 p256Square(&ftmp, &ftmp) // 2^4 - 2^2
669 p256Mul(&ftmp, &ftmp, &e2) // 2^4 - 2^0
670 p256Assign(&e4, &ftmp)
671 p256Square(&ftmp, &ftmp) // 2^5 - 2^1
672 p256Square(&ftmp, &ftmp) // 2^6 - 2^2
673 p256Square(&ftmp, &ftmp) // 2^7 - 2^3
674 p256Square(&ftmp, &ftmp) // 2^8 - 2^4
675 p256Mul(&ftmp, &ftmp, &e4) // 2^8 - 2^0
676 p256Assign(&e8, &ftmp)
677 for i := 0; i < 8; i++ {
678 p256Square(&ftmp, &ftmp)
679 } // 2^16 - 2^8
680 p256Mul(&ftmp, &ftmp, &e8) // 2^16 - 2^0
681 p256Assign(&e16, &ftmp)
682 for i := 0; i < 16; i++ {
683 p256Square(&ftmp, &ftmp)
684 } // 2^32 - 2^16
685 p256Mul(&ftmp, &ftmp, &e16) // 2^32 - 2^0
686 p256Assign(&e32, &ftmp)
687 for i := 0; i < 32; i++ {
688 p256Square(&ftmp, &ftmp)
689 } // 2^64 - 2^32
690 p256Assign(&e64, &ftmp)
691 p256Mul(&ftmp, &ftmp, in) // 2^64 - 2^32 + 2^0
692 for i := 0; i < 192; i++ {
693 p256Square(&ftmp, &ftmp)
694 } // 2^256 - 2^224 + 2^192
695
696 p256Mul(&ftmp2, &e64, &e32) // 2^64 - 2^0
697 for i := 0; i < 16; i++ {
698 p256Square(&ftmp2, &ftmp2)
699 } // 2^80 - 2^16
700 p256Mul(&ftmp2, &ftmp2, &e16) // 2^80 - 2^0
701 for i := 0; i < 8; i++ {
702 p256Square(&ftmp2, &ftmp2)
703 } // 2^88 - 2^8
704 p256Mul(&ftmp2, &ftmp2, &e8) // 2^88 - 2^0
705 for i := 0; i < 4; i++ {
706 p256Square(&ftmp2, &ftmp2)
707 } // 2^92 - 2^4
708 p256Mul(&ftmp2, &ftmp2, &e4) // 2^92 - 2^0
709 p256Square(&ftmp2, &ftmp2) // 2^93 - 2^1
710 p256Square(&ftmp2, &ftmp2) // 2^94 - 2^2
711 p256Mul(&ftmp2, &ftmp2, &e2) // 2^94 - 2^0
712 p256Square(&ftmp2, &ftmp2) // 2^95 - 2^1
713 p256Square(&ftmp2, &ftmp2) // 2^96 - 2^2
714 p256Mul(&ftmp2, &ftmp2, in) // 2^96 - 3
715
716 p256Mul(out, &ftmp2, &ftmp) // 2^256 - 2^224 + 2^192 + 2^96 - 3
717 }
718
719 // p256Scalar3 sets out=3*out.
720 //
721 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
722 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
723 func p256Scalar3(out *[p256Limbs]uint32) {
724 var carry uint32
725
726 for i := 0; ; i++ {
727 out[i] *= 3
728 out[i] += carry
729 carry = out[i] >> 29
730 out[i] &= bottom29Bits
731
732 i++
733 if i == p256Limbs {
734 break
735 }
736
737 out[i] *= 3
738 out[i] += carry
739 carry = out[i] >> 28
740 out[i] &= bottom28Bits
741 }
742
743 p256ReduceCarry(out, carry)
744 }
745
746 // p256Scalar4 sets out=4*out.
747 //
748 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
749 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
750 func p256Scalar4(out *[p256Limbs]uint32) {
751 var carry, nextCarry uint32
752
753 for i := 0; ; i++ {
754 nextCarry = out[i] >> 27
755 out[i] <<= 2
756 out[i] &= bottom29Bits
757 out[i] += carry
758 carry = nextCarry + (out[i] >> 29)
759 out[i] &= bottom29Bits
760
761 i++
762 if i == p256Limbs {
763 break
764 }
765 nextCarry = out[i] >> 26
766 out[i] <<= 2
767 out[i] &= bottom28Bits
768 out[i] += carry
769 carry = nextCarry + (out[i] >> 28)
770 out[i] &= bottom28Bits
771 }
772
773 p256ReduceCarry(out, carry)
774 }
775
776 // p256Scalar8 sets out=8*out.
777 //
778 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
779 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
780 func p256Scalar8(out *[p256Limbs]uint32) {
781 var carry, nextCarry uint32
782
783 for i := 0; ; i++ {
784 nextCarry = out[i] >> 26
785 out[i] <<= 3
786 out[i] &= bottom29Bits
787 out[i] += carry
788 carry = nextCarry + (out[i] >> 29)
789 out[i] &= bottom29Bits
790
791 i++
792 if i == p256Limbs {
793 break
794 }
795 nextCarry = out[i] >> 25
796 out[i] <<= 3
797 out[i] &= bottom28Bits
798 out[i] += carry
799 carry = nextCarry + (out[i] >> 28)
800 out[i] &= bottom28Bits
801 }
802
803 p256ReduceCarry(out, carry)
804 }
805
806 // Group operations:
807 //
808 // Elements of the elliptic curve group are represented in Jacobian
809 // coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
810 // Jacobian form.
811
812 // p256PointDouble sets {xOut,yOut,zOut} = 2*{x,y,z}.
813 //
814 // See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling -dbl-2009-l
815 func p256PointDouble(xOut, yOut, zOut, x, y, z *[p256Limbs]uint32) {
816 var delta, gamma, alpha, beta, tmp, tmp2 [p256Limbs]uint32
817
818 p256Square(&delta, z)
819 p256Square(&gamma, y)
820 p256Mul(&beta, x, &gamma)
821
822 p256Sum(&tmp, x, &delta)
823 p256Diff(&tmp2, x, &delta)
824 p256Mul(&alpha, &tmp, &tmp2)
825 p256Scalar3(&alpha)
826
827 p256Sum(&tmp, y, z)
828 p256Square(&tmp, &tmp)
829 p256Diff(&tmp, &tmp, &gamma)
830 p256Diff(zOut, &tmp, &delta)
831
832 p256Scalar4(&beta)
833 p256Square(xOut, &alpha)
834 p256Diff(xOut, xOut, &beta)
835 p256Diff(xOut, xOut, &beta)
836
837 p256Diff(&tmp, &beta, xOut)
838 p256Mul(&tmp, &alpha, &tmp)
839 p256Square(&tmp2, &gamma)
840 p256Scalar8(&tmp2)
841 p256Diff(yOut, &tmp, &tmp2)
842 }
843
844 // p256PointAddMixed sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,1}.
845 // (i.e. the second point is affine.)
846 //
847 // See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
848 //
849 // Note that this function does not handle P+P, infinity+P nor P+infinity
850 // correctly.
851 func p256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *[p256Limbs]uint32) {
852 var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32
853
854 p256Square(&z1z1, z1)
855 p256Sum(&tmp, z1, z1)
856
857 p256Mul(&u2, x2, &z1z1)
858 p256Mul(&z1z1z1, z1, &z1z1)
859 p256Mul(&s2, y2, &z1z1z1)
860 p256Diff(&h, &u2, x1)
861 p256Sum(&i, &h, &h)
862 p256Square(&i, &i)
863 p256Mul(&j, &h, &i)
864 p256Diff(&r, &s2, y1)
865 p256Sum(&r, &r, &r)
866 p256Mul(&v, x1, &i)
867
868 p256Mul(zOut, &tmp, &h)
869 p256Square(&rr, &r)
870 p256Diff(xOut, &rr, &j)
871 p256Diff(xOut, xOut, &v)
872 p256Diff(xOut, xOut, &v)
873
874 p256Diff(&tmp, &v, xOut)
875 p256Mul(yOut, &tmp, &r)
876 p256Mul(&tmp, y1, &j)
877 p256Diff(yOut, yOut, &tmp)
878 p256Diff(yOut, yOut, &tmp)
879 }
880
881 // p256PointAdd sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,z2}.
882 //
883 // See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
884 //
885 // Note that this function does not handle P+P, infinity+P nor P+infinity
886 // correctly.
887 func p256PointAdd(xOut, yOut, zOut, x1, y1, z1, x2, y2, z2 *[p256Limbs]uint32) {
888 var z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp [ p256Limbs]uint32
889
890 p256Square(&z1z1, z1)
891 p256Square(&z2z2, z2)
892 p256Mul(&u1, x1, &z2z2)
893
894 p256Sum(&tmp, z1, z2)
895 p256Square(&tmp, &tmp)
896 p256Diff(&tmp, &tmp, &z1z1)
897 p256Diff(&tmp, &tmp, &z2z2)
898
899 p256Mul(&z2z2z2, z2, &z2z2)
900 p256Mul(&s1, y1, &z2z2z2)
901
902 p256Mul(&u2, x2, &z1z1)
903 p256Mul(&z1z1z1, z1, &z1z1)
904 p256Mul(&s2, y2, &z1z1z1)
905 p256Diff(&h, &u2, &u1)
906 p256Sum(&i, &h, &h)
907 p256Square(&i, &i)
908 p256Mul(&j, &h, &i)
909 p256Diff(&r, &s2, &s1)
910 p256Sum(&r, &r, &r)
911 p256Mul(&v, &u1, &i)
912
913 p256Mul(zOut, &tmp, &h)
914 p256Square(&rr, &r)
915 p256Diff(xOut, &rr, &j)
916 p256Diff(xOut, xOut, &v)
917 p256Diff(xOut, xOut, &v)
918
919 p256Diff(&tmp, &v, xOut)
920 p256Mul(yOut, &tmp, &r)
921 p256Mul(&tmp, &s1, &j)
922 p256Diff(yOut, yOut, &tmp)
923 p256Diff(yOut, yOut, &tmp)
924 }
925
926 // p256CopyConditional sets out=in if mask = 0xffffffff in constant time.
927 //
928 // On entry: mask is either 0 or 0xffffffff.
929 func p256CopyConditional(out, in *[p256Limbs]uint32, mask uint32) {
930 for i := 0; i < p256Limbs; i++ {
931 tmp := mask & (in[i] ^ out[i])
932 out[i] ^= tmp
933 }
934 }
935
936 // p256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table.
937 // On entry: index < 16, table[0] must be zero.
938 func p256SelectAffinePoint(xOut, yOut *[p256Limbs]uint32, table []uint32, index uint32) {
939 for i := range xOut {
940 xOut[i] = 0
941 }
942 for i := range yOut {
943 yOut[i] = 0
944 }
945
946 for i := uint32(1); i < 16; i++ {
947 mask := i ^ index
948 mask |= mask >> 2
949 mask |= mask >> 1
950 mask &= 1
951 mask--
952 for j := range xOut {
953 xOut[j] |= table[0] & mask
954 table = table[1:]
955 }
956 for j := range yOut {
957 yOut[j] |= table[0] & mask
958 table = table[1:]
959 }
960 }
961 }
962
963 // p256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of
964 // table.
965 // On entry: index < 16, table[0] must be zero.
966 func p256SelectJacobianPoint(xOut, yOut, zOut *[p256Limbs]uint32, table *[16][3] [p256Limbs]uint32, index uint32) {
967 for i := range xOut {
968 xOut[i] = 0
969 }
970 for i := range yOut {
971 yOut[i] = 0
972 }
973 for i := range zOut {
974 zOut[i] = 0
975 }
976
977 // The implicit value at index 0 is all zero. We don't need to perform t hat
978 // iteration of the loop because we already set out_* to zero.
979 for i := uint32(1); i < 16; i++ {
980 mask := i ^ index
981 mask |= mask >> 2
982 mask |= mask >> 1
983 mask &= 1
984 mask--
985 for j := range xOut {
986 xOut[j] |= table[i][0][j] & mask
987 }
988 for j := range yOut {
989 yOut[j] |= table[i][1][j] & mask
990 }
991 for j := range zOut {
992 zOut[j] |= table[i][2][j] & mask
993 }
994 }
995 }
996
997 // p256GetBit returns the bit'th bit of scalar.
998 func p256GetBit(scalar *[32]uint8, bit uint) uint32 {
999 return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1)
1000 }
1001
1002 // p256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a
1003 // little-endian number. Note that the value of scalar must be less than the
1004 // order of the group.
1005 func p256ScalarBaseMult(xOut, yOut, zOut *[p256Limbs]uint32, scalar *[32]uint8) {
1006 nIsInfinityMask := ^uint32(0)
1007 var pIsNoninfiniteMask, mask, tableOffset uint32
1008 var px, py, tx, ty, tz [p256Limbs]uint32
1009
1010 for i := range xOut {
1011 xOut[i] = 0
1012 }
1013 for i := range yOut {
1014 yOut[i] = 0
1015 }
1016 for i := range zOut {
1017 zOut[i] = 0
1018 }
1019
1020 // The loop adds bits at positions 0, 64, 128 and 192, followed by
1021 // positions 32,96,160 and 224 and does this 32 times.
1022 for i := uint(0); i < 32; i++ {
1023 if i != 0 {
1024 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1025 }
1026 tableOffset = 0
1027 for j := uint(0); j <= 32; j += 32 {
1028 bit0 := p256GetBit(scalar, 31-i+j)
1029 bit1 := p256GetBit(scalar, 95-i+j)
1030 bit2 := p256GetBit(scalar, 159-i+j)
1031 bit3 := p256GetBit(scalar, 223-i+j)
1032 index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3)
1033
1034 p256SelectAffinePoint(&px, &py, p256Precomputed[tableOff set:], index)
1035 tableOffset += 30 * p256Limbs
1036
1037 // Since scalar is less than the order of the group, we know that
1038 // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle
1039 // below.
1040 p256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py)
1041 // The result of pointAddMixed is incorrect if {xOut,yOu t,zOut} is zero
1042 // (a.k.a. the point at infinity). We handle that situa tion by
1043 // copying the point from the table.
1044 p256CopyConditional(xOut, &px, nIsInfinityMask)
1045 p256CopyConditional(yOut, &py, nIsInfinityMask)
1046 p256CopyConditional(zOut, &p256One, nIsInfinityMask)
1047
1048 // Equally, the result is also wrong if the point from t he table is
1049 // zero, which happens when the index is zero. We handle that by
1050 // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if i ndex != 0.
1051 pIsNoninfiniteMask = nonZeroToAllOnes(index)
1052 mask = pIsNoninfiniteMask & ^nIsInfinityMask
1053 p256CopyConditional(xOut, &tx, mask)
1054 p256CopyConditional(yOut, &ty, mask)
1055 p256CopyConditional(zOut, &tz, mask)
1056 // If p was not zero, then n is now non-zero.
1057 nIsInfinityMask &= ^pIsNoninfiniteMask
1058 }
1059 }
1060 }
1061
1062 // p256PointToAffine converts a Jacobian point to an affine point. If the input
1063 // is the point at infinity then it returns (0, 0) in constant time.
1064 func p256PointToAffine(xOut, yOut, x, y, z *[p256Limbs]uint32) {
1065 var zInv, zInvSq [p256Limbs]uint32
1066
1067 p256Invert(&zInv, z)
1068 p256Square(&zInvSq, &zInv)
1069 p256Mul(xOut, x, &zInvSq)
1070 p256Mul(&zInv, &zInv, &zInvSq)
1071 p256Mul(yOut, y, &zInv)
1072 }
1073
1074 // p256ToAffine returns a pair of *big.Int containing the affine representation
1075 // of {x,y,z}.
1076 func p256ToAffine(x, y, z *[p256Limbs]uint32) (xOut, yOut *big.Int) {
1077 var xx, yy [p256Limbs]uint32
1078 p256PointToAffine(&xx, &yy, x, y, z)
1079 return p256ToBig(&xx), p256ToBig(&yy)
1080 }
1081
1082 // p256ScalarMult sets {xOut,yOut,zOut} = scalar*{x,y}.
1083 func p256ScalarMult(xOut, yOut, zOut, x, y *[p256Limbs]uint32, scalar *[32]uint8 ) {
1084 var px, py, pz, tx, ty, tz [p256Limbs]uint32
1085 var precomp [16][3][p256Limbs]uint32
1086 var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32
1087
1088 // We precompute 0,1,2,... times {x,y}.
1089 precomp[1][0] = *x
1090 precomp[1][1] = *y
1091 precomp[1][2] = p256One
1092
1093 for i := 2; i < 16; i += 2 {
1094 p256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2])
1095 p256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i +1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y)
1096 }
1097
1098 for i := range xOut {
1099 xOut[i] = 0
1100 }
1101 for i := range yOut {
1102 yOut[i] = 0
1103 }
1104 for i := range zOut {
1105 zOut[i] = 0
1106 }
1107 nIsInfinityMask = ^uint32(0)
1108
1109 // We add in a window of four bits each iteration and do this 64 times.
1110 for i := 0; i < 64; i++ {
1111 if i != 0 {
1112 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1113 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1114 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1115 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1116 }
1117
1118 index = uint32(scalar[31-i/2])
1119 if (i & 1) == 1 {
1120 index &= 15
1121 } else {
1122 index >>= 4
1123 }
1124
1125 // See the comments in scalarBaseMult about handling infinities.
1126 p256SelectJacobianPoint(&px, &py, &pz, &precomp, index)
1127 p256PointAdd(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py, &pz)
1128 p256CopyConditional(xOut, &px, nIsInfinityMask)
1129 p256CopyConditional(yOut, &py, nIsInfinityMask)
1130 p256CopyConditional(zOut, &pz, nIsInfinityMask)
1131
1132 pIsNoninfiniteMask = nonZeroToAllOnes(index)
1133 mask = pIsNoninfiniteMask & ^nIsInfinityMask
1134 p256CopyConditional(xOut, &tx, mask)
1135 p256CopyConditional(yOut, &ty, mask)
1136 p256CopyConditional(zOut, &tz, mask)
1137 nIsInfinityMask &= ^pIsNoninfiniteMask
1138 }
1139 }
1140
1141 // p256FromBig sets out = R*in.
1142 func p256FromBig(out *[p256Limbs]uint32, in *big.Int) {
1143 tmp := new(big.Int).Lsh(in, 257)
1144 tmp.Mod(tmp, p256.P)
1145
1146 for i := 0; i < p256Limbs; i++ {
1147 if bits := tmp.Bits(); len(bits) > 0 {
1148 out[i] = uint32(bits[0]) & bottom29Bits
1149 } else {
1150 out[i] = 0
1151 }
1152 tmp.Rsh(tmp, 29)
1153
1154 i++
1155 if i == p256Limbs {
1156 break
1157 }
1158
1159 if bits := tmp.Bits(); len(bits) > 0 {
1160 out[i] = uint32(bits[0]) & bottom28Bits
1161 } else {
1162 out[i] = 0
1163 }
1164 tmp.Rsh(tmp, 28)
1165 }
1166 }
1167
1168 // p256ToBig returns a *big.Int containing the value of in.
1169 func p256ToBig(in *[p256Limbs]uint32) *big.Int {
1170 result, tmp := new(big.Int), new(big.Int)
1171
1172 result.SetInt64(int64(in[p256Limbs-1]))
1173 for i := p256Limbs - 2; i >= 0; i-- {
1174 if (i & 1) == 0 {
1175 result.Lsh(result, 29)
1176 } else {
1177 result.Lsh(result, 28)
1178 }
1179 tmp.SetInt64(int64(in[i]))
1180 result.Add(result, tmp)
1181 }
1182
1183 result.Mul(result, p256RInverse)
1184 result.Mod(result, p256.P)
1185 return result
1186 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b