Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(429)

Delta Between Two Patch Sets: src/pkg/math/big/nat.go

Issue 6716048: code review 6716048: math/big: add 4-bit, fixed window exponentiation. (Closed)
Left Patch Set: diff -r e64d4ee19fff https://go.googlecode.com/hg/ Created 11 years, 5 months ago
Right Patch Set: diff -r 32c1ce44286f https://go.googlecode.com/hg/ Created 11 years, 5 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2009 The Go Authors. All rights reserved. 1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // Package big implements multi-precision arithmetic (big numbers). 5 // Package big implements multi-precision arithmetic (big numbers).
6 // The following numeric types are supported: 6 // The following numeric types are supported:
7 // 7 //
8 // - Int signed integers 8 // - Int signed integers
9 // - Rat rational numbers 9 // - Rat rational numbers
10 // 10 //
(...skipping 1209 matching lines...) Expand 10 before | Expand all | Expand 10 after
1220 } 1220 }
1221 z[len(limit)-1] &= mask 1221 z[len(limit)-1] &= mask
1222 if z.cmp(limit) < 0 { 1222 if z.cmp(limit) < 0 {
1223 break 1223 break
1224 } 1224 }
1225 } 1225 }
1226 1226
1227 return z.norm() 1227 return z.norm()
1228 } 1228 }
1229 1229
1230 // If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It 1230 // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
1231 // reuses the storage of z if possible. 1231 // otherwise it sets z to x**y. The result is the value of z.
1232 func (z nat) expNN(x, y, m nat) nat { 1232 func (z nat) expNN(x, y, m nat) nat {
1233 if alias(z, x) || alias(z, y) { 1233 if alias(z, x) || alias(z, y) {
1234 // We cannot allow in-place modification of x or y. 1234 // We cannot allow in-place modification of x or y.
1235 z = nil 1235 z = nil
1236 } 1236 }
1237 1237
1238 if len(y) == 0 { 1238 if len(y) == 0 {
1239 z = z.make(1) 1239 z = z.make(1)
1240 z[0] = 1 1240 z[0] = 1
1241 return z 1241 return z
1242 } 1242 }
1243 1243 » // y > 0
1244 » if m != nil { 1244
1245 » if len(m) != 0 {
1245 // We likely end up being as long as the modulus. 1246 // We likely end up being as long as the modulus.
1246 z = z.make(len(m)) 1247 z = z.make(len(m))
1247 } 1248 }
1248 z = z.set(x) 1249 z = z.set(x)
1249 1250
1250 » // If the exponent is large and the base is non-trivial, we use a 1251 » // If the base is non-trivial and the exponent is large, we use
1251 // 4-bit, windowed exponentiation. This involves precomputing 14 values 1252 // 4-bit, windowed exponentiation. This involves precomputing 14 values
1252 // (x^2...x^15) but then reduces the number of multiply-reduces by a 1253 // (x^2...x^15) but then reduces the number of multiply-reduces by a
1253 // third. Even for a 32-bit exponent, this reduces the number of 1254 // third. Even for a 32-bit exponent, this reduces the number of
1254 // operations. 1255 // operations.
1255 » if len(y) > 1 && len(x) > 1 && m != nil { 1256 » if len(x) > 1 && len(y) > 1 && len(m) > 0 {
1256 return z.expNNWindowed(x, y, m) 1257 return z.expNNWindowed(x, y, m)
1257 } 1258 }
1258 1259
1259 » v := y[len(y)-1] 1260 » v := y[len(y)-1] // v > 0 because y is normalized and y > 0
1260 » // It's invalid for the most significant word to be zero, therefore we
1261 » // will find a one bit.
1262 shift := leadingZeros(v) + 1 1261 shift := leadingZeros(v) + 1
1263 v <<= shift 1262 v <<= shift
1264 var q nat 1263 var q nat
1265 1264
1266 const mask = 1 << (_W - 1) 1265 const mask = 1 << (_W - 1)
1267 1266
1268 // We walk through the bits of the exponent one by one. Each time we 1267 // We walk through the bits of the exponent one by one. Each time we
1269 // see a bit, we square, thus doubling the power. If the bit is a one, 1268 // see a bit, we square, thus doubling the power. If the bit is a one,
1270 // we also multiply by x, thus adding one to the power. 1269 // we also multiply by x, thus adding one to the power.
1271 1270
1272 w := _W - int(shift) 1271 w := _W - int(shift)
1273 // zz and r are used to avoid allocating in mul and div as 1272 // zz and r are used to avoid allocating in mul and div as
1274 // otherwise the arguments would alias. 1273 // otherwise the arguments would alias.
1275 var zz, r nat 1274 var zz, r nat
1276 for j := 0; j < w; j++ { 1275 for j := 0; j < w; j++ {
1277 zz = zz.mul(z, z) 1276 zz = zz.mul(z, z)
1278 zz, z = z, zz 1277 zz, z = z, zz
1279 1278
1280 if v&mask != 0 { 1279 if v&mask != 0 {
1281 zz = zz.mul(z, x) 1280 zz = zz.mul(z, x)
1282 zz, z = z, zz 1281 zz, z = z, zz
1283 } 1282 }
1284 1283
1285 » » if m != nil { 1284 » » if len(m) != 0 {
1286 zz, r = zz.div(r, z, m) 1285 zz, r = zz.div(r, z, m)
1287 zz, r, q, z = q, z, zz, r 1286 zz, r, q, z = q, z, zz, r
1288 } 1287 }
1289 1288
1290 v <<= 1 1289 v <<= 1
1291 } 1290 }
1292 1291
1293 for i := len(y) - 2; i >= 0; i-- { 1292 for i := len(y) - 2; i >= 0; i-- {
1294 v = y[i] 1293 v = y[i]
1295 1294
1296 for j := 0; j < _W; j++ { 1295 for j := 0; j < _W; j++ {
1297 zz = zz.mul(z, z) 1296 zz = zz.mul(z, z)
1298 zz, z = z, zz 1297 zz, z = z, zz
1299 1298
1300 if v&mask != 0 { 1299 if v&mask != 0 {
1301 zz = zz.mul(z, x) 1300 zz = zz.mul(z, x)
1302 zz, z = z, zz 1301 zz, z = z, zz
1303 } 1302 }
1304 1303
1305 » » » if m != nil { 1304 » » » if len(m) != 0 {
1306 zz, r = zz.div(r, z, m) 1305 zz, r = zz.div(r, z, m)
1307 zz, r, q, z = q, z, zz, r 1306 zz, r, q, z = q, z, zz, r
1308 } 1307 }
1309 1308
1310 v <<= 1 1309 v <<= 1
1311 } 1310 }
1312 } 1311 }
1313 1312
1314 return z.norm() 1313 return z.norm()
1315 } 1314 }
1316 1315
1317 // expNNWindowed calculates x**y mod m using a fixed, 4-bit window. 1316 // expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
1318 func (z nat) expNNWindowed(x, y, m nat) nat { 1317 func (z nat) expNNWindowed(x, y, m nat) nat {
1319 // zz and r are used to avoid allocating in mul and div as otherwise 1318 // zz and r are used to avoid allocating in mul and div as otherwise
1320 // the arguments would alias. 1319 // the arguments would alias.
1321 var zz, r nat 1320 var zz, r nat
1322 1321
1322 const n = 4
1323 // powers[i] contains x^i. 1323 // powers[i] contains x^i.
1324 » var powers [16]nat 1324 » var powers [1 << n]nat
1325 » powers[0] = powers[0].make(1) 1325 » powers[0] = natOne
1326 » powers[0][0] = 1
1327 powers[1] = x 1326 powers[1] = x
1328 » for i := 2; i < 16; i += 2 { 1327 » for i := 2; i < 1<<n; i += 2 {
1329 » » powers[i] = powers[i].mul(powers[i/2], powers[i/2]) 1328 » » p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
1330 » » zz, r = zz.div(r, powers[i], m) 1329 » » *p = p.mul(*p2, *p2)
1331 » » powers[i], r = r, powers[i] 1330 » » zz, r = zz.div(r, *p, m)
1332 » » powers[i+1] = powers[i+1].mul(powers[i], x) 1331 » » *p, r = r, *p
1333 » » zz, r = zz.div(r, powers[i+1], m) 1332 » » *p1 = p1.mul(*p, x)
1334 » » powers[i+1], r = r, powers[i+1] 1333 » » zz, r = zz.div(r, *p1, m)
1335 » } 1334 » » *p1, r = r, *p1
1336 1335 » }
1337 » z = z.make(1) 1336
1338 » z[0] = 1 1337 » z = z.setWord(1)
1339 »······· 1338
1340 for i := len(y) - 1; i >= 0; i-- { 1339 for i := len(y) - 1; i >= 0; i-- {
1341 » » word := y[i] 1340 » » yi := y[i]
1342 » » for j := 0; j < _W; j += 4 { 1341 » » for j := 0; j < _W; j += n {
1343 if i != len(y)-1 || j != 0 { 1342 if i != len(y)-1 || j != 0 {
1344 » » » » // Manually unrolling this speeds the code up by ~15%. 1343 » » » » // Unrolled loop for significant performance
1344 » » » » // gain. Use go test -bench=".*" in crypto/rsa
1345 » » » » // to check performance before making changes.
1345 zz = zz.mul(z, z) 1346 zz = zz.mul(z, z)
1346 zz, z = z, zz 1347 zz, z = z, zz
1347 zz, r = zz.div(r, z, m) 1348 zz, r = zz.div(r, z, m)
1348 z, r = r, z 1349 z, r = r, z
1349 1350
1350 zz = zz.mul(z, z) 1351 zz = zz.mul(z, z)
1351 zz, z = z, zz 1352 zz, z = z, zz
1352 zz, r = zz.div(r, z, m) 1353 zz, r = zz.div(r, z, m)
1353 z, r = r, z 1354 z, r = r, z
1354 1355
1355 zz = zz.mul(z, z) 1356 zz = zz.mul(z, z)
1356 zz, z = z, zz 1357 zz, z = z, zz
1357 zz, r = zz.div(r, z, m) 1358 zz, r = zz.div(r, z, m)
1358 z, r = r, z 1359 z, r = r, z
1359 1360
1360 zz = zz.mul(z, z) 1361 zz = zz.mul(z, z)
1361 zz, z = z, zz 1362 zz, z = z, zz
1362 zz, r = zz.div(r, z, m) 1363 zz, r = zz.div(r, z, m)
1363 z, r = r, z 1364 z, r = r, z
1364 } 1365 }
1365 1366
1366 » » » zz = zz.mul(z, powers[word>>(_W-4)]) 1367 » » » zz = zz.mul(z, powers[yi>>(_W-n)])
1367 zz, z = z, zz 1368 zz, z = z, zz
1368 zz, r = zz.div(r, z, m) 1369 zz, r = zz.div(r, z, m)
1369 z, r = r, z 1370 z, r = r, z
1370 1371
1371 » » » word <<= 4 1372 » » » yi <<= n
1372 } 1373 }
1373 } 1374 }
1374 1375
1375 return z.norm() 1376 return z.norm()
1376 } 1377 }
1377 1378
1378 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime. 1379 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
1379 // If it returns true, n is prime with probability 1 - 1/4^reps. 1380 // If it returns true, n is prime with probability 1 - 1/4^reps.
1380 // If it returns false, n is not prime. 1381 // If it returns false, n is not prime.
1381 func (n nat) probablyPrime(reps int) bool { 1382 func (n nat) probablyPrime(reps int) bool {
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
1495 s = 0 1496 s = 0
1496 d = 0 1497 d = 0
1497 } 1498 }
1498 } 1499 }
1499 if k < len(z) { 1500 if k < len(z) {
1500 z[k] = d 1501 z[k] = d
1501 } 1502 }
1502 1503
1503 return z.norm() 1504 return z.norm()
1504 } 1505 }
LEFTRIGHT
« no previous file | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b