Source file src/crypto/internal/bigmod/_asm/nat_amd64_asm.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"strconv"
     9  
    10  	. "github.com/mmcloughlin/avo/build"
    11  	. "github.com/mmcloughlin/avo/operand"
    12  	. "github.com/mmcloughlin/avo/reg"
    13  )
    14  
    15  //go:generate go run . -out ../nat_amd64.s -pkg bigmod
    16  
    17  func main() {
    18  	Package("crypto/internal/bigmod")
    19  	ConstraintExpr("!purego")
    20  
    21  	addMulVVW(1024)
    22  	addMulVVW(1536)
    23  	addMulVVW(2048)
    24  
    25  	Generate()
    26  }
    27  
    28  func addMulVVW(bits int) {
    29  	if bits%64 != 0 {
    30  		panic("bit size unsupported")
    31  	}
    32  
    33  	Implement("addMulVVW" + strconv.Itoa(bits))
    34  
    35  	CMPB(Mem{Symbol: Symbol{Name: "·supportADX"}, Base: StaticBase}, Imm(1))
    36  	JEQ(LabelRef("adx"))
    37  
    38  	z := Mem{Base: Load(Param("z"), GP64())}
    39  	x := Mem{Base: Load(Param("x"), GP64())}
    40  	y := Load(Param("y"), GP64())
    41  
    42  	carry := GP64()
    43  	XORQ(carry, carry) // zero out carry
    44  
    45  	for i := 0; i < bits/64; i++ {
    46  		Comment("Iteration " + strconv.Itoa(i))
    47  		hi, lo := RDX, RAX // implicit MULQ inputs and outputs
    48  		MOVQ(x.Offset(i*8), lo)
    49  		MULQ(y)
    50  		ADDQ(z.Offset(i*8), lo)
    51  		ADCQ(Imm(0), hi)
    52  		ADDQ(carry, lo)
    53  		ADCQ(Imm(0), hi)
    54  		MOVQ(hi, carry)
    55  		MOVQ(lo, z.Offset(i*8))
    56  	}
    57  
    58  	Store(carry, ReturnIndex(0))
    59  	RET()
    60  
    61  	Label("adx")
    62  
    63  	// The ADX strategy implements the following function, where c1 and c2 are
    64  	// the overflow and the carry flag respectively.
    65  	//
    66  	//    func addMulVVW(z, x []uint, y uint) (carry uint) {
    67  	//        var c1, c2 uint
    68  	//        for i := range z {
    69  	//            hi, lo := bits.Mul(x[i], y)
    70  	//            lo, c1 = bits.Add(lo, z[i], c1)
    71  	//            z[i], c2 = bits.Add(lo, carry, c2)
    72  	//            carry = hi
    73  	//        }
    74  	//        return carry + c1 + c2
    75  	//    }
    76  	//
    77  	// The loop is fully unrolled and the hi / carry registers are alternated
    78  	// instead of introducing a MOV.
    79  
    80  	z = Mem{Base: Load(Param("z"), GP64())}
    81  	x = Mem{Base: Load(Param("x"), GP64())}
    82  	Load(Param("y"), RDX) // implicit source of MULXQ
    83  
    84  	carry = GP64()
    85  	XORQ(carry, carry) // zero out carry
    86  	z0 := GP64()
    87  	XORQ(z0, z0) // unset flags and zero out z0
    88  
    89  	for i := 0; i < bits/64; i++ {
    90  		hi, lo := GP64(), GP64()
    91  
    92  		Comment("Iteration " + strconv.Itoa(i))
    93  		MULXQ(x.Offset(i*8), lo, hi)
    94  		ADCXQ(carry, lo)
    95  		ADOXQ(z.Offset(i*8), lo)
    96  		MOVQ(lo, z.Offset(i*8))
    97  
    98  		i++
    99  
   100  		Comment("Iteration " + strconv.Itoa(i))
   101  		MULXQ(x.Offset(i*8), lo, carry)
   102  		ADCXQ(hi, lo)
   103  		ADOXQ(z.Offset(i*8), lo)
   104  		MOVQ(lo, z.Offset(i*8))
   105  	}
   106  
   107  	Comment("Add back carry flags and return")
   108  	ADCXQ(z0, carry)
   109  	ADOXQ(z0, carry)
   110  
   111  	Store(carry, ReturnIndex(0))
   112  	RET()
   113  }
   114  

View as plain text