# Source file src/math/big/nat.go

```     1  // Copyright 2009 The Go Authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style
4
5  // This file implements unsigned multi-precision integers (natural
6  // numbers). They are the building blocks for the implementation
7  // of signed integers, rationals, and floating-point numbers.
8  //
9  // Caution: This implementation relies on the function "alias"
10  //          which assumes that (nat) slice capacities are never
11  //          changed (no 3-operand slice expressions). If that
12  //          changes, alias needs to be updated for correctness.
13
14  package big
15
16  import (
17  	"encoding/binary"
18  	"math/bits"
19  	"math/rand"
20  	"sync"
21  )
22
23  // An unsigned integer x of the form
24  //
25  //	x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
26  //
27  // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
28  // with the digits x[i] as the slice elements.
29  //
30  // A number is normalized if the slice contains no leading 0 digits.
31  // During arithmetic operations, denormalized values may occur but are
32  // always normalized before returning the final result. The normalized
33  // representation of 0 is the empty or nil slice (length = 0).
34  type nat []Word
35
36  var (
37  	natOne  = nat{1}
38  	natTwo  = nat{2}
39  	natFive = nat{5}
40  	natTen  = nat{10}
41  )
42
43  func (z nat) String() string {
44  	return "0x" + string(z.itoa(false, 16))
45  }
46
47  func (z nat) clear() {
48  	for i := range z {
49  		z[i] = 0
50  	}
51  }
52
53  func (z nat) norm() nat {
54  	i := len(z)
55  	for i > 0 && z[i-1] == 0 {
56  		i--
57  	}
58  	return z[0:i]
59  }
60
61  func (z nat) make(n int) nat {
62  	if n <= cap(z) {
63  		return z[:n] // reuse z
64  	}
65  	if n == 1 {
66  		// Most nats start small and stay that way; don't over-allocate.
67  		return make(nat, 1)
68  	}
69  	// Choosing a good value for e has significant performance impact
70  	// because it increases the chance that a value can be reused.
71  	const e = 4 // extra capacity
72  	return make(nat, n, n+e)
73  }
74
75  func (z nat) setWord(x Word) nat {
76  	if x == 0 {
77  		return z[:0]
78  	}
79  	z = z.make(1)
80  	z[0] = x
81  	return z
82  }
83
84  func (z nat) setUint64(x uint64) nat {
85  	// single-word value
86  	if w := Word(x); uint64(w) == x {
87  		return z.setWord(w)
88  	}
89  	// 2-word value
90  	z = z.make(2)
91  	z[1] = Word(x >> 32)
92  	z[0] = Word(x)
93  	return z
94  }
95
96  func (z nat) set(x nat) nat {
97  	z = z.make(len(x))
98  	copy(z, x)
99  	return z
100  }
101
102  func (z nat) add(x, y nat) nat {
103  	m := len(x)
104  	n := len(y)
105
106  	switch {
107  	case m < n:
109  	case m == 0:
110  		// n == 0 because m >= n; result is 0
111  		return z[:0]
112  	case n == 0:
113  		// result is x
114  		return z.set(x)
115  	}
116  	// m > 0
117
118  	z = z.make(m + 1)
119  	c := addVV(z[0:n], x, y)
120  	if m > n {
121  		c = addVW(z[n:m], x[n:], c)
122  	}
123  	z[m] = c
124
125  	return z.norm()
126  }
127
128  func (z nat) sub(x, y nat) nat {
129  	m := len(x)
130  	n := len(y)
131
132  	switch {
133  	case m < n:
134  		panic("underflow")
135  	case m == 0:
136  		// n == 0 because m >= n; result is 0
137  		return z[:0]
138  	case n == 0:
139  		// result is x
140  		return z.set(x)
141  	}
142  	// m > 0
143
144  	z = z.make(m)
145  	c := subVV(z[0:n], x, y)
146  	if m > n {
147  		c = subVW(z[n:], x[n:], c)
148  	}
149  	if c != 0 {
150  		panic("underflow")
151  	}
152
153  	return z.norm()
154  }
155
156  func (x nat) cmp(y nat) (r int) {
157  	m := len(x)
158  	n := len(y)
159  	if m != n || m == 0 {
160  		switch {
161  		case m < n:
162  			r = -1
163  		case m > n:
164  			r = 1
165  		}
166  		return
167  	}
168
169  	i := m - 1
170  	for i > 0 && x[i] == y[i] {
171  		i--
172  	}
173
174  	switch {
175  	case x[i] < y[i]:
176  		r = -1
177  	case x[i] > y[i]:
178  		r = 1
179  	}
180  	return
181  }
182
183  func (z nat) mulAddWW(x nat, y, r Word) nat {
184  	m := len(x)
185  	if m == 0 || y == 0 {
186  		return z.setWord(r) // result is r
187  	}
188  	// m > 0
189
190  	z = z.make(m + 1)
191  	z[m] = mulAddVWW(z[0:m], x, y, r)
192
193  	return z.norm()
194  }
195
196  // basicMul multiplies x and y and leaves the result in z.
197  // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
198  func basicMul(z, x, y nat) {
199  	z[0 : len(x)+len(y)].clear() // initialize z
200  	for i, d := range y {
201  		if d != 0 {
202  			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
203  		}
204  	}
205  }
206
207  // montgomery computes z mod m = x*y*2**(-n*_W) mod m,
208  // assuming k = -1/m mod 2**_W.
209  // z is used for storing the result which is returned;
210  // z must not alias x, y or m.
211  // See Gueron, "Efficient Software Implementations of Modular Exponentiation".
212  // https://eprint.iacr.org/2011/239.pdf
213  // In the terminology of that paper, this is an "Almost Montgomery Multiplication":
214  // x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result
215  // z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m.
216  func (z nat) montgomery(x, y, m nat, k Word, n int) nat {
217  	// This code assumes x, y, m are all the same length, n.
218  	// (required by addMulVVW and the for loop).
219  	// It also assumes that x, y are already reduced mod m,
220  	// or else the result will not be properly reduced.
221  	if len(x) != n || len(y) != n || len(m) != n {
222  		panic("math/big: mismatched montgomery number lengths")
223  	}
224  	z = z.make(n * 2)
225  	z.clear()
226  	var c Word
227  	for i := 0; i < n; i++ {
228  		d := y[i]
229  		c2 := addMulVVW(z[i:n+i], x, d)
230  		t := z[i] * k
231  		c3 := addMulVVW(z[i:n+i], m, t)
232  		cx := c + c2
233  		cy := cx + c3
234  		z[n+i] = cy
235  		if cx < c2 || cy < c3 {
236  			c = 1
237  		} else {
238  			c = 0
239  		}
240  	}
241  	if c != 0 {
242  		subVV(z[:n], z[n:], m)
243  	} else {
244  		copy(z[:n], z[n:])
245  	}
246  	return z[:n]
247  }
248
249  // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
250  // Factored out for readability - do not use outside karatsuba.
251  func karatsubaAdd(z, x nat, n int) {
252  	if c := addVV(z[0:n], z, x); c != 0 {
254  	}
255  }
256
257  // Like karatsubaAdd, but does subtract.
258  func karatsubaSub(z, x nat, n int) {
259  	if c := subVV(z[0:n], z, x); c != 0 {
260  		subVW(z[n:n+n>>1], z[n:], c)
261  	}
262  }
263
264  // Operands that are shorter than karatsubaThreshold are multiplied using
265  // "grade school" multiplication; for longer operands the Karatsuba algorithm
266  // is used.
267  var karatsubaThreshold = 40 // computed by calibrate_test.go
268
269  // karatsuba multiplies x and y and leaves the result in z.
270  // Both x and y must have the same length n and n must be a
271  // power of 2. The result vector z must have len(z) >= 6*n.
272  // The (non-normalized) result is placed in z[0 : 2*n].
273  func karatsuba(z, x, y nat) {
274  	n := len(y)
275
276  	// Switch to basic multiplication if numbers are odd or small.
277  	// (n is always even if karatsubaThreshold is even, but be
278  	// conservative)
279  	if n&1 != 0 || n < karatsubaThreshold || n < 2 {
280  		basicMul(z, x, y)
281  		return
282  	}
283  	// n&1 == 0 && n >= karatsubaThreshold && n >= 2
284
285  	// Karatsuba multiplication is based on the observation that
286  	// for two numbers x and y with:
287  	//
288  	//   x = x1*b + x0
289  	//   y = y1*b + y0
290  	//
291  	// the product x*y can be obtained with 3 products z2, z1, z0
293  	//
294  	//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
295  	//       =    z2*b*b +              z1*b +    z0
296  	//
297  	// with:
298  	//
299  	//   xd = x1 - x0
300  	//   yd = y0 - y1
301  	//
302  	//   z1 =      xd*yd                    + z2 + z0
303  	//      = (x1-x0)*(y0 - y1)             + z2 + z0
304  	//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
305  	//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
306  	//      = x1*y0                 + x0*y1
307
308  	// split x, y into "digits"
309  	n2 := n >> 1              // n2 >= 1
310  	x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
311  	y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
312
313  	// z is used for the result and temporary storage:
314  	//
315  	//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
316  	// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
317  	//
318  	// For each recursive call of karatsuba, an unused slice of
319  	// z is passed in that has (at least) half the length of the
320  	// caller's z.
321
322  	// compute z0 and z2 with the result "in place" in z
323  	karatsuba(z, x0, y0)     // z0 = x0*y0
324  	karatsuba(z[n:], x1, y1) // z2 = x1*y1
325
326  	// compute xd (or the negative value if underflow occurs)
327  	s := 1 // sign of product xd*yd
328  	xd := z[2*n : 2*n+n2]
329  	if subVV(xd, x1, x0) != 0 { // x1-x0
330  		s = -s
331  		subVV(xd, x0, x1) // x0-x1
332  	}
333
334  	// compute yd (or the negative value if underflow occurs)
335  	yd := z[2*n+n2 : 3*n]
336  	if subVV(yd, y0, y1) != 0 { // y0-y1
337  		s = -s
338  		subVV(yd, y1, y0) // y1-y0
339  	}
340
341  	// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
342  	// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
343  	p := z[n*3:]
344  	karatsuba(p, xd, yd)
345
346  	// save original z2:z0
347  	// (ok to use upper half of z since we're done recurring)
348  	r := z[n*4:]
349  	copy(r, z[:n*2])
350
351  	// add up all partial products
352  	//
353  	//   2*n     n     0
354  	// z = [ z2  | z0  ]
355  	//   +    [ z0  ]
356  	//   +    [ z2  ]
357  	//   +    [  p  ]
358  	//
361  	if s > 0 {
363  	} else {
364  		karatsubaSub(z[n2:], p, n)
365  	}
366  }
367
368  // alias reports whether x and y share the same base array.
369  //
370  // Note: alias assumes that the capacity of underlying arrays
371  // is never changed for nat values; i.e. that there are
372  // no 3-operand slice expressions in this code (or worse,
373  // reflect-based operations to the same effect).
374  func alias(x, y nat) bool {
375  	return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
376  }
377
378  // addAt implements z += x<<(_W*i); z must be long enough.
379  // (we don't use nat.add because we need z to stay the same
380  // slice, and we don't need to normalize z after each addition)
381  func addAt(z, x nat, i int) {
382  	if n := len(x); n > 0 {
383  		if c := addVV(z[i:i+n], z[i:], x); c != 0 {
384  			j := i + n
385  			if j < len(z) {
387  			}
388  		}
389  	}
390  }
391
392  // karatsubaLen computes an approximation to the maximum k <= n such that
393  // k = p<<i for a number p <= threshold and an i >= 0. Thus, the
394  // result is the largest number that can be divided repeatedly by 2 before
395  // becoming about the value of threshold.
396  func karatsubaLen(n, threshold int) int {
397  	i := uint(0)
398  	for n > threshold {
399  		n >>= 1
400  		i++
401  	}
402  	return n << i
403  }
404
405  func (z nat) mul(x, y nat) nat {
406  	m := len(x)
407  	n := len(y)
408
409  	switch {
410  	case m < n:
411  		return z.mul(y, x)
412  	case m == 0 || n == 0:
413  		return z[:0]
414  	case n == 1:
416  	}
417  	// m >= n > 1
418
419  	// determine if z can be reused
420  	if alias(z, x) || alias(z, y) {
421  		z = nil // z is an alias for x or y - cannot reuse
422  	}
423
424  	// use basic multiplication if the numbers are small
425  	if n < karatsubaThreshold {
426  		z = z.make(m + n)
427  		basicMul(z, x, y)
428  		return z.norm()
429  	}
430  	// m >= n && n >= karatsubaThreshold && n >= 2
431
432  	// determine Karatsuba length k such that
433  	//
434  	//   x = xh*b + x0  (0 <= x0 < b)
435  	//   y = yh*b + y0  (0 <= y0 < b)
436  	//   b = 1<<(_W*k)  ("base" of digits xi, yi)
437  	//
438  	k := karatsubaLen(n, karatsubaThreshold)
439  	// k <= n
440
441  	// multiply x0 and y0 via Karatsuba
442  	x0 := x[0:k]              // x0 is not normalized
443  	y0 := y[0:k]              // y0 is not normalized
444  	z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
445  	karatsuba(z, x0, y0)
446  	z = z[0 : m+n]  // z has final length but may be incomplete
447  	z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
448
449  	// If xh != 0 or yh != 0, add the missing terms to z. For
450  	//
451  	//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
452  	//   yh =                         y1*b (0 <= y1 < b)
453  	//
454  	// the missing terms are
455  	//
456  	//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
457  	//
458  	// since all the yi for i > 1 are 0 by choice of k: If any of them
459  	// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
460  	// be a larger valid threshold contradicting the assumption about k.
461  	//
462  	if k < n || m != n {
463  		tp := getNat(3 * k)
464  		t := *tp
465
467  		x0 := x0.norm()
468  		y1 := y[k:]       // y1 is normalized because y is
469  		t = t.mul(x0, y1) // update t so we don't lose t's underlying array
471
473  		y0 := y0.norm()
474  		for i := k; i < len(x); i += k {
475  			xi := x[i:]
476  			if len(xi) > k {
477  				xi = xi[:k]
478  			}
479  			xi = xi.norm()
480  			t = t.mul(xi, y0)
482  			t = t.mul(xi, y1)
484  		}
485
486  		putNat(tp)
487  	}
488
489  	return z.norm()
490  }
491
492  // basicSqr sets z = x*x and is asymptotically faster than basicMul
493  // by about a factor of 2, but slower for small arguments due to overhead.
494  // Requirements: len(x) > 0, len(z) == 2*len(x)
495  // The (non-normalized) result is placed in z.
496  func basicSqr(z, x nat) {
497  	n := len(x)
498  	tp := getNat(2 * n)
499  	t := *tp // temporary variable to hold the products
500  	t.clear()
501  	z[1], z[0] = mulWW(x[0], x[0]) // the initial square
502  	for i := 1; i < n; i++ {
503  		d := x[i]
504  		// z collects the squares x[i] * x[i]
505  		z[2*i+1], z[2*i] = mulWW(d, d)
506  		// t collects the products x[i] * x[j] where j < i
507  		t[2*i] = addMulVVW(t[i:2*i], x[0:i], d)
508  	}
509  	t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products
510  	addVV(z, z, t)                              // combine the result
511  	putNat(tp)
512  }
513
514  // karatsubaSqr squares x and leaves the result in z.
515  // len(x) must be a power of 2 and len(z) >= 6*len(x).
516  // The (non-normalized) result is placed in z[0 : 2*len(x)].
517  //
518  // The algorithm and the layout of z are the same as for karatsuba.
519  func karatsubaSqr(z, x nat) {
520  	n := len(x)
521
522  	if n&1 != 0 || n < karatsubaSqrThreshold || n < 2 {
523  		basicSqr(z[:2*n], x)
524  		return
525  	}
526
527  	n2 := n >> 1
528  	x1, x0 := x[n2:], x[0:n2]
529
530  	karatsubaSqr(z, x0)
531  	karatsubaSqr(z[n:], x1)
532
533  	// s = sign(xd*yd) == -1 for xd != 0; s == 1 for xd == 0
534  	xd := z[2*n : 2*n+n2]
535  	if subVV(xd, x1, x0) != 0 {
536  		subVV(xd, x0, x1)
537  	}
538
539  	p := z[n*3:]
540  	karatsubaSqr(p, xd)
541
542  	r := z[n*4:]
543  	copy(r, z[:n*2])
544
547  	karatsubaSub(z[n2:], p, n) // s == -1 for p != 0; s == 1 for p == 0
548  }
549
550  // Operands that are shorter than basicSqrThreshold are squared using
551  // "grade school" multiplication; for operands longer than karatsubaSqrThreshold
552  // we use the Karatsuba algorithm optimized for x == y.
553  var basicSqrThreshold = 20      // computed by calibrate_test.go
554  var karatsubaSqrThreshold = 260 // computed by calibrate_test.go
555
556  // z = x*x
557  func (z nat) sqr(x nat) nat {
558  	n := len(x)
559  	switch {
560  	case n == 0:
561  		return z[:0]
562  	case n == 1:
563  		d := x[0]
564  		z = z.make(2)
565  		z[1], z[0] = mulWW(d, d)
566  		return z.norm()
567  	}
568
569  	if alias(z, x) {
570  		z = nil // z is an alias for x - cannot reuse
571  	}
572
573  	if n < basicSqrThreshold {
574  		z = z.make(2 * n)
575  		basicMul(z, x, x)
576  		return z.norm()
577  	}
578  	if n < karatsubaSqrThreshold {
579  		z = z.make(2 * n)
580  		basicSqr(z, x)
581  		return z.norm()
582  	}
583
584  	// Use Karatsuba multiplication optimized for x == y.
585  	// The algorithm and layout of z are the same as for mul.
586
587  	// z = (x1*b + x0)^2 = x1^2*b^2 + 2*x1*x0*b + x0^2
588
589  	k := karatsubaLen(n, karatsubaSqrThreshold)
590
591  	x0 := x[0:k]
592  	z = z.make(max(6*k, 2*n))
593  	karatsubaSqr(z, x0) // z = x0^2
594  	z = z[0 : 2*n]
595  	z[2*k:].clear()
596
597  	if k < n {
598  		tp := getNat(2 * k)
599  		t := *tp
600  		x0 := x0.norm()
601  		x1 := x[k:]
602  		t = t.mul(x0, x1)
604  		addAt(z, t, k) // z = 2*x1*x0*b + x0^2
605  		t = t.sqr(x1)
606  		addAt(z, t, 2*k) // z = x1^2*b^2 + 2*x1*x0*b + x0^2
607  		putNat(tp)
608  	}
609
610  	return z.norm()
611  }
612
613  // mulRange computes the product of all the unsigned integers in the
614  // range [a, b] inclusively. If a > b (empty range), the result is 1.
615  func (z nat) mulRange(a, b uint64) nat {
616  	switch {
617  	case a == 0:
618  		// cut long ranges short (optimization)
619  		return z.setUint64(0)
620  	case a > b:
621  		return z.setUint64(1)
622  	case a == b:
623  		return z.setUint64(a)
624  	case a+1 == b:
625  		return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
626  	}
627  	m := a + (b-a)/2 // avoid overflow
628  	return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
629  }
630
631  // getNat returns a *nat of len n. The contents may not be zero.
632  // The pool holds *nat to avoid allocation when converting to interface{}.
633  func getNat(n int) *nat {
634  	var z *nat
635  	if v := natPool.Get(); v != nil {
636  		z = v.(*nat)
637  	}
638  	if z == nil {
639  		z = new(nat)
640  	}
641  	*z = z.make(n)
642  	if n > 0 {
643  		(*z)[0] = 0xfedcb // break code expecting zero
644  	}
645  	return z
646  }
647
648  func putNat(x *nat) {
649  	natPool.Put(x)
650  }
651
652  var natPool sync.Pool
653
654  // bitLen returns the length of x in bits.
655  // Unlike most methods, it works even if x is not normalized.
656  func (x nat) bitLen() int {
657  	// This function is used in cryptographic operations. It must not leak
658  	// anything but the Int's sign and bit size through side-channels. Any
659  	// changes must be reviewed by a security expert.
660  	if i := len(x) - 1; i >= 0 {
661  		// bits.Len uses a lookup table for the low-order bits on some
662  		// architectures. Neutralize any input-dependent behavior by setting all
663  		// bits after the first one bit.
664  		top := uint(x[i])
665  		top |= top >> 1
666  		top |= top >> 2
667  		top |= top >> 4
668  		top |= top >> 8
669  		top |= top >> 16
670  		top |= top >> 16 >> 16 // ">> 32" doesn't compile on 32-bit architectures
671  		return i*_W + bits.Len(top)
672  	}
673  	return 0
674  }
675
676  // trailingZeroBits returns the number of consecutive least significant zero
677  // bits of x.
678  func (x nat) trailingZeroBits() uint {
679  	if len(x) == 0 {
680  		return 0
681  	}
682  	var i uint
683  	for x[i] == 0 {
684  		i++
685  	}
686  	// x[i] != 0
687  	return i*_W + uint(bits.TrailingZeros(uint(x[i])))
688  }
689
690  // isPow2 returns i, true when x == 2**i and 0, false otherwise.
691  func (x nat) isPow2() (uint, bool) {
692  	var i uint
693  	for x[i] == 0 {
694  		i++
695  	}
696  	if i == uint(len(x))-1 && x[i]&(x[i]-1) == 0 {
697  		return i*_W + uint(bits.TrailingZeros(uint(x[i]))), true
698  	}
699  	return 0, false
700  }
701
702  func same(x, y nat) bool {
703  	return len(x) == len(y) && len(x) > 0 && &x[0] == &y[0]
704  }
705
706  // z = x << s
707  func (z nat) shl(x nat, s uint) nat {
708  	if s == 0 {
709  		if same(z, x) {
710  			return z
711  		}
712  		if !alias(z, x) {
713  			return z.set(x)
714  		}
715  	}
716
717  	m := len(x)
718  	if m == 0 {
719  		return z[:0]
720  	}
721  	// m > 0
722
723  	n := m + int(s/_W)
724  	z = z.make(n + 1)
725  	z[n] = shlVU(z[n-m:n], x, s%_W)
726  	z[0 : n-m].clear()
727
728  	return z.norm()
729  }
730
731  // z = x >> s
732  func (z nat) shr(x nat, s uint) nat {
733  	if s == 0 {
734  		if same(z, x) {
735  			return z
736  		}
737  		if !alias(z, x) {
738  			return z.set(x)
739  		}
740  	}
741
742  	m := len(x)
743  	n := m - int(s/_W)
744  	if n <= 0 {
745  		return z[:0]
746  	}
747  	// n > 0
748
749  	z = z.make(n)
750  	shrVU(z, x[m-n:], s%_W)
751
752  	return z.norm()
753  }
754
755  func (z nat) setBit(x nat, i uint, b uint) nat {
756  	j := int(i / _W)
757  	m := Word(1) << (i % _W)
758  	n := len(x)
759  	switch b {
760  	case 0:
761  		z = z.make(n)
762  		copy(z, x)
763  		if j >= n {
764  			// no need to grow
765  			return z
766  		}
767  		z[j] &^= m
768  		return z.norm()
769  	case 1:
770  		if j >= n {
771  			z = z.make(j + 1)
772  			z[n:].clear()
773  		} else {
774  			z = z.make(n)
775  		}
776  		copy(z, x)
777  		z[j] |= m
778  		// no need to normalize
779  		return z
780  	}
781  	panic("set bit is not 0 or 1")
782  }
783
784  // bit returns the value of the i'th bit, with lsb == bit 0.
785  func (x nat) bit(i uint) uint {
786  	j := i / _W
787  	if j >= uint(len(x)) {
788  		return 0
789  	}
790  	// 0 <= j < len(x)
791  	return uint(x[j] >> (i % _W) & 1)
792  }
793
794  // sticky returns 1 if there's a 1 bit within the
795  // i least significant bits, otherwise it returns 0.
796  func (x nat) sticky(i uint) uint {
797  	j := i / _W
798  	if j >= uint(len(x)) {
799  		if len(x) == 0 {
800  			return 0
801  		}
802  		return 1
803  	}
804  	// 0 <= j < len(x)
805  	for _, x := range x[:j] {
806  		if x != 0 {
807  			return 1
808  		}
809  	}
810  	if x[j]<<(_W-i%_W) != 0 {
811  		return 1
812  	}
813  	return 0
814  }
815
816  func (z nat) and(x, y nat) nat {
817  	m := len(x)
818  	n := len(y)
819  	if m > n {
820  		m = n
821  	}
822  	// m <= n
823
824  	z = z.make(m)
825  	for i := 0; i < m; i++ {
826  		z[i] = x[i] & y[i]
827  	}
828
829  	return z.norm()
830  }
831
832  // trunc returns z = x mod 2ⁿ.
833  func (z nat) trunc(x nat, n uint) nat {
834  	w := (n + _W - 1) / _W
835  	if uint(len(x)) < w {
836  		return z.set(x)
837  	}
838  	z = z.make(int(w))
839  	copy(z, x)
840  	if n%_W != 0 {
841  		z[len(z)-1] &= 1<<(n%_W) - 1
842  	}
843  	return z.norm()
844  }
845
846  func (z nat) andNot(x, y nat) nat {
847  	m := len(x)
848  	n := len(y)
849  	if n > m {
850  		n = m
851  	}
852  	// m >= n
853
854  	z = z.make(m)
855  	for i := 0; i < n; i++ {
856  		z[i] = x[i] &^ y[i]
857  	}
858  	copy(z[n:m], x[n:m])
859
860  	return z.norm()
861  }
862
863  func (z nat) or(x, y nat) nat {
864  	m := len(x)
865  	n := len(y)
866  	s := x
867  	if m < n {
868  		n, m = m, n
869  		s = y
870  	}
871  	// m >= n
872
873  	z = z.make(m)
874  	for i := 0; i < n; i++ {
875  		z[i] = x[i] | y[i]
876  	}
877  	copy(z[n:m], s[n:m])
878
879  	return z.norm()
880  }
881
882  func (z nat) xor(x, y nat) nat {
883  	m := len(x)
884  	n := len(y)
885  	s := x
886  	if m < n {
887  		n, m = m, n
888  		s = y
889  	}
890  	// m >= n
891
892  	z = z.make(m)
893  	for i := 0; i < n; i++ {
894  		z[i] = x[i] ^ y[i]
895  	}
896  	copy(z[n:m], s[n:m])
897
898  	return z.norm()
899  }
900
901  // random creates a random integer in [0..limit), using the space in z if
902  // possible. n is the bit length of limit.
903  func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
904  	if alias(z, limit) {
905  		z = nil // z is an alias for limit - cannot reuse
906  	}
907  	z = z.make(len(limit))
908
909  	bitLengthOfMSW := uint(n % _W)
910  	if bitLengthOfMSW == 0 {
911  		bitLengthOfMSW = _W
912  	}
913  	mask := Word((1 << bitLengthOfMSW) - 1)
914
915  	for {
916  		switch _W {
917  		case 32:
918  			for i := range z {
919  				z[i] = Word(rand.Uint32())
920  			}
921  		case 64:
922  			for i := range z {
923  				z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
924  			}
925  		default:
926  			panic("unknown word size")
927  		}
929  		if z.cmp(limit) < 0 {
930  			break
931  		}
932  	}
933
934  	return z.norm()
935  }
936
937  // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
938  // otherwise it sets z to x**y. The result is the value of z.
939  func (z nat) expNN(x, y, m nat, slow bool) nat {
940  	if alias(z, x) || alias(z, y) {
941  		// We cannot allow in-place modification of x or y.
942  		z = nil
943  	}
944
945  	// x**y mod 1 == 0
946  	if len(m) == 1 && m[0] == 1 {
947  		return z.setWord(0)
948  	}
949  	// m == 0 || m > 1
950
951  	// x**0 == 1
952  	if len(y) == 0 {
953  		return z.setWord(1)
954  	}
955  	// y > 0
956
957  	// 0**y = 0
958  	if len(x) == 0 {
959  		return z.setWord(0)
960  	}
961  	// x > 0
962
963  	// 1**y = 1
964  	if len(x) == 1 && x[0] == 1 {
965  		return z.setWord(1)
966  	}
967  	// x > 1
968
969  	// x**1 == x
970  	if len(y) == 1 && y[0] == 1 {
971  		if len(m) != 0 {
972  			return z.rem(x, m)
973  		}
974  		return z.set(x)
975  	}
976  	// y > 1
977
978  	if len(m) != 0 {
979  		// We likely end up being as long as the modulus.
980  		z = z.make(len(m))
981
982  		// If the exponent is large, we use the Montgomery method for odd values,
983  		// and a 4-bit, windowed exponentiation for powers of two,
984  		// and a CRT-decomposed Montgomery method for the remaining values
985  		// (even values times non-trivial odd values, which decompose into one
986  		// instance of each of the first two cases).
987  		if len(y) > 1 && !slow {
988  			if m[0]&1 == 1 {
989  				return z.expNNMontgomery(x, y, m)
990  			}
991  			if logM, ok := m.isPow2(); ok {
992  				return z.expNNWindowed(x, y, logM)
993  			}
994  			return z.expNNMontgomeryEven(x, y, m)
995  		}
996  	}
997
998  	z = z.set(x)
999  	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
1000  	shift := nlz(v) + 1
1001  	v <<= shift
1002  	var q nat
1003
1004  	const mask = 1 << (_W - 1)
1005
1006  	// We walk through the bits of the exponent one by one. Each time we
1007  	// see a bit, we square, thus doubling the power. If the bit is a one,
1008  	// we also multiply by x, thus adding one to the power.
1009
1010  	w := _W - int(shift)
1011  	// zz and r are used to avoid allocating in mul and div as
1012  	// otherwise the arguments would alias.
1013  	var zz, r nat
1014  	for j := 0; j < w; j++ {
1015  		zz = zz.sqr(z)
1016  		zz, z = z, zz
1017
1018  		if v&mask != 0 {
1019  			zz = zz.mul(z, x)
1020  			zz, z = z, zz
1021  		}
1022
1023  		if len(m) != 0 {
1024  			zz, r = zz.div(r, z, m)
1025  			zz, r, q, z = q, z, zz, r
1026  		}
1027
1028  		v <<= 1
1029  	}
1030
1031  	for i := len(y) - 2; i >= 0; i-- {
1032  		v = y[i]
1033
1034  		for j := 0; j < _W; j++ {
1035  			zz = zz.sqr(z)
1036  			zz, z = z, zz
1037
1038  			if v&mask != 0 {
1039  				zz = zz.mul(z, x)
1040  				zz, z = z, zz
1041  			}
1042
1043  			if len(m) != 0 {
1044  				zz, r = zz.div(r, z, m)
1045  				zz, r, q, z = q, z, zz, r
1046  			}
1047
1048  			v <<= 1
1049  		}
1050  	}
1051
1052  	return z.norm()
1053  }
1054
1055  // expNNMontgomeryEven calculates x**y mod m where m = m1 × m2 for m1 = 2ⁿ and m2 odd.
1056  // It uses two recursive calls to expNN for x**y mod m1 and x**y mod m2
1057  // and then uses the Chinese Remainder Theorem to combine the results.
1058  // The recursive call using m1 will use expNNWindowed,
1059  // while the recursive call using m2 will use expNNMontgomery.
1060  // For more details, see Ç. K. Koç, “Montgomery Reduction with Even Modulus”,
1061  // IEE Proceedings: Computers and Digital Techniques, 141(5) 314-316, September 1994.
1062  // http://www.people.vcu.edu/~jwang3/CMSC691/j34monex.pdf
1063  func (z nat) expNNMontgomeryEven(x, y, m nat) nat {
1064  	// Split m = m₁ × m₂ where m₁ = 2ⁿ
1065  	n := m.trailingZeroBits()
1066  	m1 := nat(nil).shl(natOne, n)
1067  	m2 := nat(nil).shr(m, n)
1068
1069  	// We want z = x**y mod m.
1070  	// z₁ = x**y mod m1 = (x**y mod m) mod m1 = z mod m1
1071  	// z₂ = x**y mod m2 = (x**y mod m) mod m2 = z mod m2
1072  	// (We are using the math/big convention for names here,
1073  	// where the computation is z = x**y mod m, so its parts are z1 and z2.
1074  	// The paper is computing x = a**e mod n; it refers to these as x2 and z1.)
1075  	z1 := nat(nil).expNN(x, y, m1, false)
1076  	z2 := nat(nil).expNN(x, y, m2, false)
1077
1078  	// Reconstruct z from z₁, z₂ using CRT, using algorithm from paper,
1079  	// which uses only a single modInverse (and an easy one at that).
1080  	//	p = (z₁ - z₂) × m₂⁻¹ (mod m₁)
1081  	//	z = z₂ + p × m₂
1082  	// The final addition is in range because:
1083  	//	z = z₂ + p × m₂
1084  	//	  ≤ z₂ + (m₁-1) × m₂
1085  	//	  < m₂ + (m₁-1) × m₂
1086  	//	  = m₁ × m₂
1087  	//	  = m.
1088  	z = z.set(z2)
1089
1090  	// Compute (z₁ - z₂) mod m1 [m1 == 2**n] into z1.
1091  	z1 = z1.subMod2N(z1, z2, n)
1092
1093  	// Reuse z2 for p = (z₁ - z₂) [in z1] * m2⁻¹ (mod m₁ [= 2ⁿ]).
1094  	m2inv := nat(nil).modInverse(m2, m1)
1095  	z2 = z2.mul(z1, m2inv)
1096  	z2 = z2.trunc(z2, n)
1097
1098  	// Reuse z1 for p * m2.
1099  	z = z.add(z, z1.mul(z2, m2))
1100
1101  	return z
1102  }
1103
1104  // expNNWindowed calculates x**y mod m using a fixed, 4-bit window,
1105  // where m = 2**logM.
1106  func (z nat) expNNWindowed(x, y nat, logM uint) nat {
1107  	if len(y) <= 1 {
1108  		panic("big: misuse of expNNWindowed")
1109  	}
1110  	if x[0]&1 == 0 {
1111  		// len(y) > 1, so y  > logM.
1112  		// x is even, so x**y is a multiple of 2**y which is a multiple of 2**logM.
1113  		return z.setWord(0)
1114  	}
1115  	if logM == 1 {
1116  		return z.setWord(1)
1117  	}
1118
1119  	// zz is used to avoid allocating in mul as otherwise
1120  	// the arguments would alias.
1121  	w := int((logM + _W - 1) / _W)
1122  	zzp := getNat(w)
1123  	zz := *zzp
1124
1125  	const n = 4
1126  	// powers[i] contains x^i.
1127  	var powers [1 << n]*nat
1128  	for i := range powers {
1129  		powers[i] = getNat(w)
1130  	}
1131  	*powers[0] = powers[0].set(natOne)
1132  	*powers[1] = powers[1].trunc(x, logM)
1133  	for i := 2; i < 1<<n; i += 2 {
1134  		p2, p, p1 := powers[i/2], powers[i], powers[i+1]
1135  		*p = p.sqr(*p2)
1136  		*p = p.trunc(*p, logM)
1137  		*p1 = p1.mul(*p, x)
1138  		*p1 = p1.trunc(*p1, logM)
1139  	}
1140
1141  	// Because phi(2**logM) = 2**(logM-1), x**(2**(logM-1)) = 1,
1142  	// so we can compute x**(y mod 2**(logM-1)) instead of x**y.
1143  	// That is, we can throw away all but the bottom logM-1 bits of y.
1144  	// Instead of allocating a new y, we start reading y at the right word
1145  	// and truncate it appropriately at the start of the loop.
1146  	i := len(y) - 1
1147  	mtop := int((logM - 2) / _W) // -2 because the top word of N bits is the (N-1)/W'th word.
1149  	if mbits := (logM - 1) & (_W - 1); mbits != 0 {
1150  		mmask = (1 << mbits) - 1
1151  	}
1152  	if i > mtop {
1153  		i = mtop
1154  	}
1156  	z = z.setWord(1)
1157  	for ; i >= 0; i-- {
1158  		yi := y[i]
1159  		if i == mtop {
1161  		}
1162  		for j := 0; j < _W; j += n {
1164  				// Account for use of 4 bits in previous iteration.
1165  				// Unrolled loop for significant performance
1166  				// gain. Use go test -bench=".*" in crypto/rsa
1167  				// to check performance before making changes.
1168  				zz = zz.sqr(z)
1169  				zz, z = z, zz
1170  				z = z.trunc(z, logM)
1171
1172  				zz = zz.sqr(z)
1173  				zz, z = z, zz
1174  				z = z.trunc(z, logM)
1175
1176  				zz = zz.sqr(z)
1177  				zz, z = z, zz
1178  				z = z.trunc(z, logM)
1179
1180  				zz = zz.sqr(z)
1181  				zz, z = z, zz
1182  				z = z.trunc(z, logM)
1183  			}
1184
1185  			zz = zz.mul(z, *powers[yi>>(_W-n)])
1186  			zz, z = z, zz
1187  			z = z.trunc(z, logM)
1188
1189  			yi <<= n
1191  		}
1192  	}
1193
1194  	*zzp = zz
1195  	putNat(zzp)
1196  	for i := range powers {
1197  		putNat(powers[i])
1198  	}
1199
1200  	return z.norm()
1201  }
1202
1203  // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window.
1204  // Uses Montgomery representation.
1205  func (z nat) expNNMontgomery(x, y, m nat) nat {
1206  	numWords := len(m)
1207
1208  	// We want the lengths of x and m to be equal.
1209  	// It is OK if x >= m as long as len(x) == len(m).
1210  	if len(x) > numWords {
1211  		_, x = nat(nil).div(nil, x, m)
1212  		// Note: now len(x) <= numWords, not guaranteed ==.
1213  	}
1214  	if len(x) < numWords {
1215  		rr := make(nat, numWords)
1216  		copy(rr, x)
1217  		x = rr
1218  	}
1219
1220  	// Ideally the precomputations would be performed outside, and reused
1221  	// k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson
1222  	// Iteration for Multiplicative Inverses Modulo Prime Powers".
1223  	k0 := 2 - m[0]
1224  	t := m[0] - 1
1225  	for i := 1; i < _W; i <<= 1 {
1226  		t *= t
1227  		k0 *= (t + 1)
1228  	}
1229  	k0 = -k0
1230
1231  	// RR = 2**(2*_W*len(m)) mod m
1232  	RR := nat(nil).setWord(1)
1233  	zz := nat(nil).shl(RR, uint(2*numWords*_W))
1234  	_, RR = nat(nil).div(RR, zz, m)
1235  	if len(RR) < numWords {
1236  		zz = zz.make(numWords)
1237  		copy(zz, RR)
1238  		RR = zz
1239  	}
1240  	// one = 1, with equal length to that of m
1241  	one := make(nat, numWords)
1242  	one[0] = 1
1243
1244  	const n = 4
1245  	// powers[i] contains x^i
1246  	var powers [1 << n]nat
1247  	powers[0] = powers[0].montgomery(one, RR, m, k0, numWords)
1248  	powers[1] = powers[1].montgomery(x, RR, m, k0, numWords)
1249  	for i := 2; i < 1<<n; i++ {
1250  		powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords)
1251  	}
1252
1253  	// initialize z = 1 (Montgomery 1)
1254  	z = z.make(numWords)
1255  	copy(z, powers[0])
1256
1257  	zz = zz.make(numWords)
1258
1259  	// same windowed exponent, but with Montgomery multiplications
1260  	for i := len(y) - 1; i >= 0; i-- {
1261  		yi := y[i]
1262  		for j := 0; j < _W; j += n {
1263  			if i != len(y)-1 || j != 0 {
1264  				zz = zz.montgomery(z, z, m, k0, numWords)
1265  				z = z.montgomery(zz, zz, m, k0, numWords)
1266  				zz = zz.montgomery(z, z, m, k0, numWords)
1267  				z = z.montgomery(zz, zz, m, k0, numWords)
1268  			}
1269  			zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords)
1270  			z, zz = zz, z
1271  			yi <<= n
1272  		}
1273  	}
1274  	// convert to regular number
1275  	zz = zz.montgomery(z, one, m, k0, numWords)
1276
1277  	// One last reduction, just in case.
1278  	// See golang.org/issue/13907.
1279  	if zz.cmp(m) >= 0 {
1280  		// Common case is m has high bit set; in that case,
1281  		// since zz is the same length as m, there can be just
1282  		// one multiple of m to remove. Just subtract.
1283  		// We think that the subtract should be sufficient in general,
1284  		// so do that unconditionally, but double-check,
1285  		// in case our beliefs are wrong.
1286  		// The div is not expected to be reached.
1287  		zz = zz.sub(zz, m)
1288  		if zz.cmp(m) >= 0 {
1289  			_, zz = nat(nil).div(nil, zz, m)
1290  		}
1291  	}
1292
1293  	return zz.norm()
1294  }
1295
1296  // bytes writes the value of z into buf using big-endian encoding.
1297  // The value of z is encoded in the slice buf[i:]. If the value of z
1298  // cannot be represented in buf, bytes panics. The number i of unused
1299  // bytes at the beginning of buf is returned as result.
1300  func (z nat) bytes(buf []byte) (i int) {
1301  	// This function is used in cryptographic operations. It must not leak
1302  	// anything but the Int's sign and bit size through side-channels. Any
1303  	// changes must be reviewed by a security expert.
1304  	i = len(buf)
1305  	for _, d := range z {
1306  		for j := 0; j < _S; j++ {
1307  			i--
1308  			if i >= 0 {
1309  				buf[i] = byte(d)
1310  			} else if byte(d) != 0 {
1311  				panic("math/big: buffer too small to fit value")
1312  			}
1313  			d >>= 8
1314  		}
1315  	}
1316
1317  	if i < 0 {
1318  		i = 0
1319  	}
1320  	for i < len(buf) && buf[i] == 0 {
1321  		i++
1322  	}
1323
1324  	return
1325  }
1326
1327  // bigEndianWord returns the contents of buf interpreted as a big-endian encoded Word value.
1328  func bigEndianWord(buf []byte) Word {
1329  	if _W == 64 {
1330  		return Word(binary.BigEndian.Uint64(buf))
1331  	}
1332  	return Word(binary.BigEndian.Uint32(buf))
1333  }
1334
1335  // setBytes interprets buf as the bytes of a big-endian unsigned
1336  // integer, sets z to that value, and returns z.
1337  func (z nat) setBytes(buf []byte) nat {
1338  	z = z.make((len(buf) + _S - 1) / _S)
1339
1340  	i := len(buf)
1341  	for k := 0; i >= _S; k++ {
1342  		z[k] = bigEndianWord(buf[i-_S : i])
1343  		i -= _S
1344  	}
1345  	if i > 0 {
1346  		var d Word
1347  		for s := uint(0); i > 0; s += 8 {
1348  			d |= Word(buf[i-1]) << s
1349  			i--
1350  		}
1351  		z[len(z)-1] = d
1352  	}
1353
1354  	return z.norm()
1355  }
1356
1357  // sqrt sets z = ⌊√x⌋
1358  func (z nat) sqrt(x nat) nat {
1359  	if x.cmp(natOne) <= 0 {
1360  		return z.set(x)
1361  	}
1362  	if alias(z, x) {
1363  		z = nil
1364  	}
1365
1366  	// Start with value known to be too large and repeat "z = ⌊(z + ⌊x/z⌋)/2⌋" until it stops getting smaller.
1367  	// See Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 (SqrtInt).
1368  	// https://members.loria.fr/PZimmermann/mca/pub226.html
1369  	// If x is one less than a perfect square, the sequence oscillates between the correct z and z+1;
1370  	// otherwise it converges to the correct z and stays there.
1371  	var z1, z2 nat
1372  	z1 = z
1373  	z1 = z1.setUint64(1)
1374  	z1 = z1.shl(z1, uint(x.bitLen()+1)/2) // must be ≥ √x
1375  	for n := 0; ; n++ {
1376  		z2, _ = z2.div(nil, x, z1)
1378  		z2 = z2.shr(z2, 1)
1379  		if z2.cmp(z1) >= 0 {
1381  			// Figure out whether z1 or z2 is currently aliased to z by looking at loop count.
1382  			if n&1 == 0 {
1383  				return z1
1384  			}
1385  			return z.set(z1)
1386  		}
1387  		z1, z2 = z2, z1
1388  	}
1389  }
1390
1391  // subMod2N returns z = (x - y) mod 2ⁿ.
1392  func (z nat) subMod2N(x, y nat, n uint) nat {
1393  	if uint(x.bitLen()) > n {
1394  		if alias(z, x) {
1395  			// ok to overwrite x in place
1396  			x = x.trunc(x, n)
1397  		} else {
1398  			x = nat(nil).trunc(x, n)
1399  		}
1400  	}
1401  	if uint(y.bitLen()) > n {
1402  		if alias(z, y) {
1403  			// ok to overwrite y in place
1404  			y = y.trunc(y, n)
1405  		} else {
1406  			y = nat(nil).trunc(y, n)
1407  		}
1408  	}
1409  	if x.cmp(y) >= 0 {
1410  		return z.sub(x, y)
1411  	}
1412  	// x - y < 0; x - y mod 2ⁿ = x - y + 2ⁿ = 2ⁿ - (y - x) = 1 + 2ⁿ-1 - (y - x) = 1 + ^(y - x).
1413  	z = z.sub(y, x)
1414  	for uint(len(z))*_W < n {
1415  		z = append(z, 0)
1416  	}
1417  	for i := range z {
1418  		z[i] = ^z[i]
1419  	}
1420  	z = z.trunc(z, n)