p256_asm.go

Documentation: crypto/elliptic

		 1  // Copyright 2015 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
		 6  // implementation of P256. The optimizations performed here are described in
		 7  // detail in:
		 8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
		 9  //													256-bit primes"
		10  // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
		11  // https://eprint.iacr.org/2013/816.pdf
		12  
		13  //go:build amd64 || arm64
		14  // +build amd64 arm64
		15  
		16  package elliptic
		17  
		18  import (
		19  	"math/big"
		20  )
		21  
		22  type (
		23  	p256Curve struct {
		24  		*CurveParams
		25  	}
		26  
		27  	p256Point struct {
		28  		xyz [12]uint64
		29  	}
		30  )
		31  
		32  var p256 p256Curve
		33  
		34  func initP256() {
		35  	// See FIPS 186-3, section D.2.3
		36  	p256.CurveParams = &CurveParams{Name: "P-256"}
		37  	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
		38  	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
		39  	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
		40  	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
		41  	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
		42  	p256.BitSize = 256
		43  }
		44  
		45  func (curve p256Curve) Params() *CurveParams {
		46  	return curve.CurveParams
		47  }
		48  
		49  // Functions implemented in p256_asm_*64.s
		50  // Montgomery multiplication modulo P256
		51  //go:noescape
		52  func p256Mul(res, in1, in2 []uint64)
		53  
		54  // Montgomery square modulo P256, repeated n times (n >= 1)
		55  //go:noescape
		56  func p256Sqr(res, in []uint64, n int)
		57  
		58  // Montgomery multiplication by 1
		59  //go:noescape
		60  func p256FromMont(res, in []uint64)
		61  
		62  // iff cond == 1	val <- -val
		63  //go:noescape
		64  func p256NegCond(val []uint64, cond int)
		65  
		66  // if cond == 0 res <- b; else res <- a
		67  //go:noescape
		68  func p256MovCond(res, a, b []uint64, cond int)
		69  
		70  // Endianness swap
		71  //go:noescape
		72  func p256BigToLittle(res []uint64, in []byte)
		73  
		74  //go:noescape
		75  func p256LittleToBig(res []byte, in []uint64)
		76  
		77  // Constant time table access
		78  //go:noescape
		79  func p256Select(point, table []uint64, idx int)
		80  
		81  //go:noescape
		82  func p256SelectBase(point, table []uint64, idx int)
		83  
		84  // Montgomery multiplication modulo Ord(G)
		85  //go:noescape
		86  func p256OrdMul(res, in1, in2 []uint64)
		87  
		88  // Montgomery square modulo Ord(G), repeated n times
		89  //go:noescape
		90  func p256OrdSqr(res, in []uint64, n int)
		91  
		92  // Point add with in2 being affine point
		93  // If sign == 1 -> in2 = -in2
		94  // If sel == 0 -> res = in1
		95  // if zero == 0 -> res = in2
		96  //go:noescape
		97  func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
		98  
		99  // Point add. Returns one if the two input points were equal and zero
	 100  // otherwise. (Note that, due to the way that the equations work out, some
	 101  // representations of ∞ are considered equal to everything by this function.)
	 102  //go:noescape
	 103  func p256PointAddAsm(res, in1, in2 []uint64) int
	 104  
	 105  // Point double
	 106  //go:noescape
	 107  func p256PointDoubleAsm(res, in []uint64)
	 108  
	 109  func (curve p256Curve) Inverse(k *big.Int) *big.Int {
	 110  	if k.Sign() < 0 {
	 111  		// This should never happen.
	 112  		k = new(big.Int).Neg(k)
	 113  	}
	 114  
	 115  	if k.Cmp(p256.N) >= 0 {
	 116  		// This should never happen.
	 117  		k = new(big.Int).Mod(k, p256.N)
	 118  	}
	 119  
	 120  	// table will store precomputed powers of x.
	 121  	var table [4 * 9]uint64
	 122  	var (
	 123  		_1			= table[4*0 : 4*1]
	 124  		_11		 = table[4*1 : 4*2]
	 125  		_101		= table[4*2 : 4*3]
	 126  		_111		= table[4*3 : 4*4]
	 127  		_1111	 = table[4*4 : 4*5]
	 128  		_10101	= table[4*5 : 4*6]
	 129  		_101111 = table[4*6 : 4*7]
	 130  		x			 = table[4*7 : 4*8]
	 131  		t			 = table[4*8 : 4*9]
	 132  	)
	 133  
	 134  	fromBig(x[:], k)
	 135  	// This code operates in the Montgomery domain where R = 2^256 mod n
	 136  	// and n is the order of the scalar field. (See initP256 for the
	 137  	// value.) Elements in the Montgomery domain take the form a×R and
	 138  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
	 139  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
	 140  	// i.e. converts x into the Montgomery domain.
	 141  	// Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
	 142  	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
	 143  	p256OrdMul(_1, x, RR)			// _1
	 144  	p256OrdSqr(x, _1, 1)			 // _10
	 145  	p256OrdMul(_11, x, _1)		 // _11
	 146  	p256OrdMul(_101, x, _11)	 // _101
	 147  	p256OrdMul(_111, x, _101)	// _111
	 148  	p256OrdSqr(x, _101, 1)		 // _1010
	 149  	p256OrdMul(_1111, _101, x) // _1111
	 150  
	 151  	p256OrdSqr(t, x, 1)					// _10100
	 152  	p256OrdMul(_10101, t, _1)		// _10101
	 153  	p256OrdSqr(x, _10101, 1)		 // _101010
	 154  	p256OrdMul(_101111, _101, x) // _101111
	 155  	p256OrdMul(x, _10101, x)		 // _111111 = x6
	 156  	p256OrdSqr(t, x, 2)					// _11111100
	 157  	p256OrdMul(t, t, _11)				// _11111111 = x8
	 158  	p256OrdSqr(x, t, 8)					// _ff00
	 159  	p256OrdMul(x, x, t)					// _ffff = x16
	 160  	p256OrdSqr(t, x, 16)				 // _ffff0000
	 161  	p256OrdMul(t, t, x)					// _ffffffff = x32
	 162  
	 163  	p256OrdSqr(x, t, 64)
	 164  	p256OrdMul(x, x, t)
	 165  	p256OrdSqr(x, x, 32)
	 166  	p256OrdMul(x, x, t)
	 167  
	 168  	sqrs := []uint8{
	 169  		6, 5, 4, 5, 5,
	 170  		4, 3, 3, 5, 9,
	 171  		6, 2, 5, 6, 5,
	 172  		4, 5, 5, 3, 10,
	 173  		2, 5, 5, 3, 7, 6}
	 174  	muls := [][]uint64{
	 175  		_101111, _111, _11, _1111, _10101,
	 176  		_101, _101, _101, _111, _101111,
	 177  		_1111, _1, _1, _1111, _111,
	 178  		_111, _111, _101, _11, _101111,
	 179  		_11, _11, _11, _1, _10101, _1111}
	 180  
	 181  	for i, s := range sqrs {
	 182  		p256OrdSqr(x, x, int(s))
	 183  		p256OrdMul(x, x, muls[i])
	 184  	}
	 185  
	 186  	// Multiplying by one in the Montgomery domain converts a Montgomery
	 187  	// value out of the domain.
	 188  	one := []uint64{1, 0, 0, 0}
	 189  	p256OrdMul(x, x, one)
	 190  
	 191  	xOut := make([]byte, 32)
	 192  	p256LittleToBig(xOut, x)
	 193  	return new(big.Int).SetBytes(xOut)
	 194  }
	 195  
	 196  // fromBig converts a *big.Int into a format used by this code.
	 197  func fromBig(out []uint64, big *big.Int) {
	 198  	for i := range out {
	 199  		out[i] = 0
	 200  	}
	 201  
	 202  	for i, v := range big.Bits() {
	 203  		out[i] = uint64(v)
	 204  	}
	 205  }
	 206  
	 207  // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
	 208  // to out. If the scalar is equal or greater than the order of the group, it's
	 209  // reduced modulo that order.
	 210  func p256GetScalar(out []uint64, in []byte) {
	 211  	n := new(big.Int).SetBytes(in)
	 212  
	 213  	if n.Cmp(p256.N) >= 0 {
	 214  		n.Mod(n, p256.N)
	 215  	}
	 216  	fromBig(out, n)
	 217  }
	 218  
	 219  // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
	 220  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
	 221  // R×R mod p. See comment in Inverse about how this is used.
	 222  var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
	 223  
	 224  func maybeReduceModP(in *big.Int) *big.Int {
	 225  	if in.Cmp(p256.P) < 0 {
	 226  		return in
	 227  	}
	 228  	return new(big.Int).Mod(in, p256.P)
	 229  }
	 230  
	 231  func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
	 232  	scalarReversed := make([]uint64, 4)
	 233  	var r1, r2 p256Point
	 234  	p256GetScalar(scalarReversed, baseScalar)
	 235  	r1IsInfinity := scalarIsZero(scalarReversed)
	 236  	r1.p256BaseMult(scalarReversed)
	 237  
	 238  	p256GetScalar(scalarReversed, scalar)
	 239  	r2IsInfinity := scalarIsZero(scalarReversed)
	 240  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
	 241  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
	 242  	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
	 243  	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
	 244  
	 245  	// This sets r2's Z value to 1, in the Montgomery domain.
	 246  	r2.xyz[8] = 0x0000000000000001
	 247  	r2.xyz[9] = 0xffffffff00000000
	 248  	r2.xyz[10] = 0xffffffffffffffff
	 249  	r2.xyz[11] = 0x00000000fffffffe
	 250  
	 251  	r2.p256ScalarMult(scalarReversed)
	 252  
	 253  	var sum, double p256Point
	 254  	pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
	 255  	p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
	 256  	sum.CopyConditional(&double, pointsEqual)
	 257  	sum.CopyConditional(&r1, r2IsInfinity)
	 258  	sum.CopyConditional(&r2, r1IsInfinity)
	 259  
	 260  	return sum.p256PointToAffine()
	 261  }
	 262  
	 263  func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
	 264  	scalarReversed := make([]uint64, 4)
	 265  	p256GetScalar(scalarReversed, scalar)
	 266  
	 267  	var r p256Point
	 268  	r.p256BaseMult(scalarReversed)
	 269  	return r.p256PointToAffine()
	 270  }
	 271  
	 272  func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
	 273  	scalarReversed := make([]uint64, 4)
	 274  	p256GetScalar(scalarReversed, scalar)
	 275  
	 276  	var r p256Point
	 277  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
	 278  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
	 279  	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
	 280  	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
	 281  	// This sets r2's Z value to 1, in the Montgomery domain.
	 282  	r.xyz[8] = 0x0000000000000001
	 283  	r.xyz[9] = 0xffffffff00000000
	 284  	r.xyz[10] = 0xffffffffffffffff
	 285  	r.xyz[11] = 0x00000000fffffffe
	 286  
	 287  	r.p256ScalarMult(scalarReversed)
	 288  	return r.p256PointToAffine()
	 289  }
	 290  
	 291  // uint64IsZero returns 1 if x is zero and zero otherwise.
	 292  func uint64IsZero(x uint64) int {
	 293  	x = ^x
	 294  	x &= x >> 32
	 295  	x &= x >> 16
	 296  	x &= x >> 8
	 297  	x &= x >> 4
	 298  	x &= x >> 2
	 299  	x &= x >> 1
	 300  	return int(x & 1)
	 301  }
	 302  
	 303  // scalarIsZero returns 1 if scalar represents the zero value, and zero
	 304  // otherwise.
	 305  func scalarIsZero(scalar []uint64) int {
	 306  	return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
	 307  }
	 308  
	 309  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
	 310  	zInv := make([]uint64, 4)
	 311  	zInvSq := make([]uint64, 4)
	 312  	p256Inverse(zInv, p.xyz[8:12])
	 313  	p256Sqr(zInvSq, zInv, 1)
	 314  	p256Mul(zInv, zInv, zInvSq)
	 315  
	 316  	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
	 317  	p256Mul(zInv, p.xyz[4:8], zInv)
	 318  
	 319  	p256FromMont(zInvSq, zInvSq)
	 320  	p256FromMont(zInv, zInv)
	 321  
	 322  	xOut := make([]byte, 32)
	 323  	yOut := make([]byte, 32)
	 324  	p256LittleToBig(xOut, zInvSq)
	 325  	p256LittleToBig(yOut, zInv)
	 326  
	 327  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
	 328  }
	 329  
	 330  // CopyConditional copies overwrites p with src if v == 1, and leaves p
	 331  // unchanged if v == 0.
	 332  func (p *p256Point) CopyConditional(src *p256Point, v int) {
	 333  	pMask := uint64(v) - 1
	 334  	srcMask := ^pMask
	 335  
	 336  	for i, n := range p.xyz {
	 337  		p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
	 338  	}
	 339  }
	 340  
	 341  // p256Inverse sets out to in^-1 mod p.
	 342  func p256Inverse(out, in []uint64) {
	 343  	var stack [6 * 4]uint64
	 344  	p2 := stack[4*0 : 4*0+4]
	 345  	p4 := stack[4*1 : 4*1+4]
	 346  	p8 := stack[4*2 : 4*2+4]
	 347  	p16 := stack[4*3 : 4*3+4]
	 348  	p32 := stack[4*4 : 4*4+4]
	 349  
	 350  	p256Sqr(out, in, 1)
	 351  	p256Mul(p2, out, in) // 3*p
	 352  
	 353  	p256Sqr(out, p2, 2)
	 354  	p256Mul(p4, out, p2) // f*p
	 355  
	 356  	p256Sqr(out, p4, 4)
	 357  	p256Mul(p8, out, p4) // ff*p
	 358  
	 359  	p256Sqr(out, p8, 8)
	 360  	p256Mul(p16, out, p8) // ffff*p
	 361  
	 362  	p256Sqr(out, p16, 16)
	 363  	p256Mul(p32, out, p16) // ffffffff*p
	 364  
	 365  	p256Sqr(out, p32, 32)
	 366  	p256Mul(out, out, in)
	 367  
	 368  	p256Sqr(out, out, 128)
	 369  	p256Mul(out, out, p32)
	 370  
	 371  	p256Sqr(out, out, 32)
	 372  	p256Mul(out, out, p32)
	 373  
	 374  	p256Sqr(out, out, 16)
	 375  	p256Mul(out, out, p16)
	 376  
	 377  	p256Sqr(out, out, 8)
	 378  	p256Mul(out, out, p8)
	 379  
	 380  	p256Sqr(out, out, 4)
	 381  	p256Mul(out, out, p4)
	 382  
	 383  	p256Sqr(out, out, 2)
	 384  	p256Mul(out, out, p2)
	 385  
	 386  	p256Sqr(out, out, 2)
	 387  	p256Mul(out, out, in)
	 388  }
	 389  
	 390  func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
	 391  	copy(r[index*12:], p.xyz[:])
	 392  }
	 393  
	 394  func boothW5(in uint) (int, int) {
	 395  	var s uint = ^((in >> 5) - 1)
	 396  	var d uint = (1 << 6) - in - 1
	 397  	d = (d & s) | (in & (^s))
	 398  	d = (d >> 1) + (d & 1)
	 399  	return int(d), int(s & 1)
	 400  }
	 401  
	 402  func boothW6(in uint) (int, int) {
	 403  	var s uint = ^((in >> 6) - 1)
	 404  	var d uint = (1 << 7) - in - 1
	 405  	d = (d & s) | (in & (^s))
	 406  	d = (d >> 1) + (d & 1)
	 407  	return int(d), int(s & 1)
	 408  }
	 409  
	 410  func (p *p256Point) p256BaseMult(scalar []uint64) {
	 411  	wvalue := (scalar[0] << 1) & 0x7f
	 412  	sel, sign := boothW6(uint(wvalue))
	 413  	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
	 414  	p256NegCond(p.xyz[4:8], sign)
	 415  
	 416  	// (This is one, in the Montgomery domain.)
	 417  	p.xyz[8] = 0x0000000000000001
	 418  	p.xyz[9] = 0xffffffff00000000
	 419  	p.xyz[10] = 0xffffffffffffffff
	 420  	p.xyz[11] = 0x00000000fffffffe
	 421  
	 422  	var t0 p256Point
	 423  	// (This is one, in the Montgomery domain.)
	 424  	t0.xyz[8] = 0x0000000000000001
	 425  	t0.xyz[9] = 0xffffffff00000000
	 426  	t0.xyz[10] = 0xffffffffffffffff
	 427  	t0.xyz[11] = 0x00000000fffffffe
	 428  
	 429  	index := uint(5)
	 430  	zero := sel
	 431  
	 432  	for i := 1; i < 43; i++ {
	 433  		if index < 192 {
	 434  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
	 435  		} else {
	 436  			wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
	 437  		}
	 438  		index += 6
	 439  		sel, sign = boothW6(uint(wvalue))
	 440  		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
	 441  		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
	 442  		zero |= sel
	 443  	}
	 444  }
	 445  
	 446  func (p *p256Point) p256ScalarMult(scalar []uint64) {
	 447  	// precomp is a table of precomputed points that stores powers of p
	 448  	// from p^1 to p^16.
	 449  	var precomp [16 * 4 * 3]uint64
	 450  	var t0, t1, t2, t3 p256Point
	 451  
	 452  	// Prepare the table
	 453  	p.p256StorePoint(&precomp, 0) // 1
	 454  
	 455  	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
	 456  	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
	 457  	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
	 458  	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
	 459  	t0.p256StorePoint(&precomp, 1)	// 2
	 460  	t1.p256StorePoint(&precomp, 3)	// 4
	 461  	t2.p256StorePoint(&precomp, 7)	// 8
	 462  	t3.p256StorePoint(&precomp, 15) // 16
	 463  
	 464  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
	 465  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
	 466  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
	 467  	t0.p256StorePoint(&precomp, 2) // 3
	 468  	t1.p256StorePoint(&precomp, 4) // 5
	 469  	t2.p256StorePoint(&precomp, 8) // 9
	 470  
	 471  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
	 472  	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
	 473  	t0.p256StorePoint(&precomp, 5) // 6
	 474  	t1.p256StorePoint(&precomp, 9) // 10
	 475  
	 476  	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
	 477  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
	 478  	t2.p256StorePoint(&precomp, 6)	// 7
	 479  	t1.p256StorePoint(&precomp, 10) // 11
	 480  
	 481  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
	 482  	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
	 483  	t0.p256StorePoint(&precomp, 11) // 12
	 484  	t2.p256StorePoint(&precomp, 13) // 14
	 485  
	 486  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
	 487  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
	 488  	t0.p256StorePoint(&precomp, 12) // 13
	 489  	t2.p256StorePoint(&precomp, 14) // 15
	 490  
	 491  	// Start scanning the window from top bit
	 492  	index := uint(254)
	 493  	var sel, sign int
	 494  
	 495  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
	 496  	sel, _ = boothW5(uint(wvalue))
	 497  
	 498  	p256Select(p.xyz[0:12], precomp[0:], sel)
	 499  	zero := sel
	 500  
	 501  	for index > 4 {
	 502  		index -= 5
	 503  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 504  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 505  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 506  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 507  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 508  
	 509  		if index < 192 {
	 510  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
	 511  		} else {
	 512  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
	 513  		}
	 514  
	 515  		sel, sign = boothW5(uint(wvalue))
	 516  
	 517  		p256Select(t0.xyz[0:], precomp[0:], sel)
	 518  		p256NegCond(t0.xyz[4:8], sign)
	 519  		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
	 520  		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
	 521  		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
	 522  		zero |= sel
	 523  	}
	 524  
	 525  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 526  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 527  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 528  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 529  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
	 530  
	 531  	wvalue = (scalar[0] << 1) & 0x3f
	 532  	sel, sign = boothW5(uint(wvalue))
	 533  
	 534  	p256Select(t0.xyz[0:], precomp[0:], sel)
	 535  	p256NegCond(t0.xyz[4:8], sign)
	 536  	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
	 537  	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
	 538  	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
	 539  }
	 540
View as plain text