Source file src/simd/archsimd/_gen/simdgen/gen_simdTypes.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"bytes"
     9  	"cmp"
    10  	"fmt"
    11  	"maps"
    12  	"slices"
    13  	"sort"
    14  	"strings"
    15  	"unicode"
    16  )
    17  
    18  type simdType struct {
    19  	Name                    string // The go type name of this simd type, for example Int32x4.
    20  	Lanes                   int    // The number of elements in this vector/mask.
    21  	Base                    string // The element's type, like for Int32x4 it will be int32.
    22  	Fields                  string // The struct fields, it should be right formatted.
    23  	Type                    string // Either "mask" or "vreg"
    24  	VectorCounterpart       string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int"
    25  	ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32.
    26  	Size                    int    // The size of the vector type
    27  }
    28  
    29  func (x simdType) ElemBits() int {
    30  	return x.Size / x.Lanes
    31  }
    32  
    33  func (x simdType) Article() string {
    34  	if strings.HasPrefix(x.Name, "Int") {
    35  		return "an"
    36  	}
    37  	return "a" // Float, Uint
    38  }
    39  
    40  // LanesContainer returns the smallest int/uint bit size that is
    41  // large enough to hold one bit for each lane.  E.g., Mask32x4
    42  // is 4 lanes, and a uint8 is the smallest uint that has 4 bits.
    43  func (x simdType) LanesContainer() int {
    44  	if x.Lanes > 64 {
    45  		panic("too many lanes")
    46  	}
    47  	if x.Lanes > 32 {
    48  		return 64
    49  	}
    50  	if x.Lanes > 16 {
    51  		return 32
    52  	}
    53  	if x.Lanes > 8 {
    54  		return 16
    55  	}
    56  	return 8
    57  }
    58  
    59  // MaskedLoadStoreFilter encodes which simd type type currently
    60  // get masked loads/stores generated, it is used in two places,
    61  // this forces coordination.
    62  func (x simdType) MaskedLoadStoreFilter() bool {
    63  	return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask"
    64  }
    65  
    66  func (x simdType) IntelSizeSuffix() string {
    67  	switch x.ElemBits() {
    68  	case 8:
    69  		return "B"
    70  	case 16:
    71  		return "W"
    72  	case 32:
    73  		return "D"
    74  	case 64:
    75  		return "Q"
    76  	}
    77  	panic("oops")
    78  }
    79  
    80  func (x simdType) MaskedLoadDoc() string {
    81  	if x.Size == 512 || x.ElemBits() < 32 {
    82  		return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits())
    83  	} else {
    84  		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
    85  	}
    86  }
    87  
    88  func (x simdType) MaskedStoreDoc() string {
    89  	if x.Size == 512 || x.ElemBits() < 32 {
    90  		return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits())
    91  	} else {
    92  		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
    93  	}
    94  }
    95  
    96  func (x simdType) ToBitsDoc() string {
    97  	if x.Size == 512 || x.ElemBits() == 16 {
    98  		return fmt.Sprintf("// Asm: KMOV%s, CPU Features: AVX512", x.IntelSizeSuffix())
    99  	}
   100  	// 128/256 bit vectors with 8, 32, 64 bit elements
   101  	var asm string
   102  	var feat string
   103  	switch x.ElemBits() {
   104  	case 8:
   105  		asm = "VPMOVMSKB"
   106  		if x.Size == 256 {
   107  			feat = "AVX2"
   108  		} else {
   109  			feat = "AVX"
   110  		}
   111  	case 32:
   112  		asm = "VMOVMSKPS"
   113  		feat = "AVX"
   114  	case 64:
   115  		asm = "VMOVMSKPD"
   116  		feat = "AVX"
   117  	default:
   118  		panic("unexpected ElemBits")
   119  	}
   120  	return fmt.Sprintf("// Asm: %s, CPU Features: %s", asm, feat)
   121  }
   122  
   123  func compareSimdTypes(x, y simdType) int {
   124  	// "vreg" then "mask"
   125  	if c := -compareNatural(x.Type, y.Type); c != 0 {
   126  		return c
   127  	}
   128  	// want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64),
   129  	// not "int16" < "int32" < "int64" < "int8")
   130  	// so limit comparison to first 3 bytes in string.
   131  	if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 {
   132  		return c
   133  	}
   134  	// base type size, 8 < 16 < 32 < 64
   135  	if c := x.ElemBits() - y.ElemBits(); c != 0 {
   136  		return c
   137  	}
   138  	// vector size last
   139  	return x.Size - y.Size
   140  }
   141  
   142  type simdTypeMap map[int][]simdType
   143  
   144  type simdTypePair struct {
   145  	Tsrc simdType
   146  	Tdst simdType
   147  }
   148  
   149  func compareSimdTypePairs(x, y simdTypePair) int {
   150  	c := compareSimdTypes(x.Tsrc, y.Tsrc)
   151  	if c != 0 {
   152  		return c
   153  	}
   154  	return compareSimdTypes(x.Tdst, y.Tdst)
   155  }
   156  
   157  const simdPackageHeader = generatedHeader + `
   158  //go:build goexperiment.simd
   159  
   160  package archsimd
   161  `
   162  
   163  const simdTypesTemplates = `
   164  {{define "sizeTmpl"}}
   165  // v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD
   166  type v{{.}} struct {
   167  	_{{.}} [0]func() // uncomparable
   168  }
   169  {{end}}
   170  
   171  {{define "typeTmpl"}}
   172  {{- if eq .Type "mask"}}
   173  // {{.Name}} is a mask for a SIMD vector of {{.Lanes}} {{.ElemBits}}-bit elements.
   174  {{- else}}
   175  // {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}s.
   176  {{- end}}
   177  type {{.Name}} struct {
   178  {{.Fields}}
   179  }
   180  
   181  {{end}}
   182  `
   183  
   184  const simdFeaturesTemplate = `
   185  import "internal/cpu"
   186  
   187  type X86Features struct {}
   188  
   189  var X86 X86Features
   190  
   191  {{range .}}
   192  {{$f := .}}
   193  {{- if eq .Feature "AVX512"}}
   194  // {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
   195  //
   196  // These five CPU features are bundled together, and no use of AVX-512
   197  // is allowed unless all of these features are supported together.
   198  // Nearly every CPU that has shipped with any support for AVX-512 has
   199  // supported all five of these features.
   200  {{- else -}}
   201  // {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
   202  {{- end}}
   203  {{- if ne .ImpliesAll ""}}
   204  //
   205  // If it returns true, then the CPU also supports {{.ImpliesAll}}.
   206  {{- end}}
   207  //
   208  // {{.Feature}} is defined on all GOARCHes, but will only return true on
   209  // GOARCH {{.GoArch}}.
   210  func ({{.FeatureVar}}Features) {{.Feature}}() bool {
   211  {{- if .Virtual}}
   212  	return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}}
   213  {{- else}}
   214  	return cpu.{{.FeatureVar}}.Has{{.Feature}}
   215  {{- end}}
   216  }
   217  {{end}}
   218  `
   219  
   220  const simdLoadStoreTemplate = `
   221  // Len returns the number of elements in {{.Article}} {{.Name}}.
   222  func (x {{.Name}}) Len() int { return {{.Lanes}} }
   223  
   224  // Load{{.Name}} loads {{.Article}} {{.Name}} from an array.
   225  //
   226  //go:noescape
   227  func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
   228  
   229  // Store stores {{.Article}} {{.Name}} to an array.
   230  //
   231  //go:noescape
   232  func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
   233  `
   234  
   235  const simdMaskFromValTemplate = `
   236  // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
   237  {{- if ne .Lanes .LanesContainer}}
   238  // Only the lower {{.Lanes}} bits of y are used.
   239  {{- end}}
   240  //
   241  // Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512
   242  func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
   243  
   244  // ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset.
   245  {{- if ne .Lanes .LanesContainer}}
   246  // Only the lower {{.Lanes}} bits of y are used.
   247  {{- end}}
   248  //
   249  {{.ToBitsDoc}}
   250  func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
   251  `
   252  
   253  const simdMaskedLoadStoreTemplate = `
   254  // LoadMasked{{.Name}} loads {{.Article}} {{.Name}} from an array,
   255  // at those elements enabled by mask.
   256  //
   257  {{.MaskedLoadDoc}}
   258  //
   259  //go:noescape
   260  func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
   261  
   262  // StoreMasked stores {{.Article}} {{.Name}} to an array,
   263  // at those elements enabled by mask.
   264  //
   265  {{.MaskedStoreDoc}}
   266  //
   267  //go:noescape
   268  func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
   269  `
   270  
   271  const simdStubsTmpl = `
   272  {{define "op1"}}
   273  {{if .Documentation}}{{.Documentation}}
   274  //{{end}}
   275  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   276  func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}}
   277  {{end}}
   278  
   279  {{define "op2"}}
   280  {{if .Documentation}}{{.Documentation}}
   281  //{{end}}
   282  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   283  func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
   284  {{end}}
   285  
   286  {{define "op2_21"}}
   287  {{if .Documentation}}{{.Documentation}}
   288  //{{end}}
   289  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   290  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
   291  {{end}}
   292  
   293  {{define "op2_21Type1"}}
   294  {{if .Documentation}}{{.Documentation}}
   295  //{{end}}
   296  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   297  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
   298  {{end}}
   299  
   300  {{define "op3"}}
   301  {{if .Documentation}}{{.Documentation}}
   302  //{{end}}
   303  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   304  func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
   305  {{end}}
   306  
   307  {{define "op3_31Zero3"}}
   308  {{if .Documentation}}{{.Documentation}}
   309  //{{end}}
   310  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   311  func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
   312  {{end}}
   313  
   314  {{define "op3_21"}}
   315  {{if .Documentation}}{{.Documentation}}
   316  //{{end}}
   317  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   318  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
   319  {{end}}
   320  
   321  {{define "op3_21Type1"}}
   322  {{if .Documentation}}{{.Documentation}}
   323  //{{end}}
   324  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   325  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
   326  {{end}}
   327  
   328  {{define "op3_231Type1"}}
   329  {{if .Documentation}}{{.Documentation}}
   330  //{{end}}
   331  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   332  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
   333  {{end}}
   334  
   335  {{define "op2VecAsScalar"}}
   336  {{if .Documentation}}{{.Documentation}}
   337  //{{end}}
   338  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   339  func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
   340  {{end}}
   341  
   342  {{define "op3VecAsScalar"}}
   343  {{if .Documentation}}{{.Documentation}}
   344  //{{end}}
   345  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   346  func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
   347  {{end}}
   348  
   349  {{define "op4"}}
   350  {{if .Documentation}}{{.Documentation}}
   351  //{{end}}
   352  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   353  func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
   354  {{end}}
   355  
   356  {{define "op4_231Type1"}}
   357  {{if .Documentation}}{{.Documentation}}
   358  //{{end}}
   359  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   360  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
   361  {{end}}
   362  
   363  {{define "op4_31"}}
   364  {{if .Documentation}}{{.Documentation}}
   365  //{{end}}
   366  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   367  func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
   368  {{end}}
   369  
   370  {{define "op1Imm8"}}
   371  {{if .Documentation}}{{.Documentation}}
   372  //{{end}}
   373  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   374  //
   375  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   376  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
   377  {{end}}
   378  
   379  {{define "op2Imm8"}}
   380  {{if .Documentation}}{{.Documentation}}
   381  //{{end}}
   382  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   383  //
   384  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   385  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
   386  {{end}}
   387  
   388  {{define "op2Imm8_2I"}}
   389  {{if .Documentation}}{{.Documentation}}
   390  //{{end}}
   391  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   392  //
   393  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   394  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
   395  {{end}}
   396  
   397  {{define "op2Imm8_II"}}
   398  {{if .Documentation}}{{.Documentation}}
   399  //{{end}}
   400  // {{.ImmName}} result in better performance when they are constants, non-constant values will be translated into a jump table.
   401  // {{.ImmName}} should be between 0 and 3, inclusive; other values may result in a runtime panic.
   402  //
   403  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   404  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
   405  {{end}}
   406  
   407  {{define "op2Imm8_SHA1RNDS4"}}
   408  {{if .Documentation}}{{.Documentation}}
   409  //{{end}}
   410  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   411  //
   412  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   413  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
   414  {{end}}
   415  
   416  {{define "op3Imm8"}}
   417  {{if .Documentation}}{{.Documentation}}
   418  //{{end}}
   419  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   420  //
   421  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   422  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
   423  {{end}}
   424  
   425  {{define "op3Imm8_2I"}}
   426  {{if .Documentation}}{{.Documentation}}
   427  //{{end}}
   428  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   429  //
   430  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   431  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
   432  {{end}}
   433  
   434  
   435  {{define "op4Imm8"}}
   436  {{if .Documentation}}{{.Documentation}}
   437  //{{end}}
   438  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   439  //
   440  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   441  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
   442  {{end}}
   443  
   444  {{define "vectorConversion"}}
   445  // As{{.Tdst.Name}} returns {{.Tdst.Article}} {{.Tdst.Name}} with the same bit representation as x.
   446  func (x {{.Tsrc.Name}}) As{{.Tdst.Name}}() {{.Tdst.Name}}
   447  {{end}}
   448  
   449  {{define "mask"}}
   450  // To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}.
   451  func (from {{.Name}}) To{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
   452  
   453  // asMask converts from {{.VectorCounterpart}} to {{.Name}}.
   454  func (from {{.VectorCounterpart}}) asMask() (to {{.Name}})
   455  
   456  func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
   457  
   458  func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
   459  {{end}}
   460  `
   461  
   462  // parseSIMDTypes groups go simd types by their vector sizes, and
   463  // returns a map whose key is the vector size, value is the simd type.
   464  func parseSIMDTypes(ops []Operation) simdTypeMap {
   465  	// TODO: maybe instead of going over ops, let's try go over types.yaml.
   466  	ret := map[int][]simdType{}
   467  	seen := map[string]struct{}{}
   468  	processArg := func(arg Operand) {
   469  		if arg.Class == "immediate" || arg.Class == "greg" {
   470  			// Immediates are not encoded as vector types.
   471  			return
   472  		}
   473  		if _, ok := seen[*arg.Go]; ok {
   474  			return
   475  		}
   476  		seen[*arg.Go] = struct{}{}
   477  
   478  		lanes := *arg.Lanes
   479  		base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits)
   480  		tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes)
   481  		tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits)
   482  		valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base)
   483  		fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS)
   484  		if arg.Class == "mask" {
   485  			vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
   486  			reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
   487  			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
   488  			// In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well.
   489  			if _, ok := seen[vectorCounterpart]; !ok {
   490  				seen[vectorCounterpart] = struct{}{}
   491  				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
   492  			}
   493  		} else {
   494  			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
   495  		}
   496  	}
   497  	for _, op := range ops {
   498  		for _, arg := range op.In {
   499  			processArg(arg)
   500  		}
   501  		for _, arg := range op.Out {
   502  			processArg(arg)
   503  		}
   504  	}
   505  	return ret
   506  }
   507  
   508  func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
   509  	v := []simdTypePair{}
   510  	for _, ts := range typeMap {
   511  		for i, tsrc := range ts {
   512  			for j, tdst := range ts {
   513  				if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" &&
   514  					tsrc.Lanes > 1 && tdst.Lanes > 1 {
   515  					v = append(v, simdTypePair{tsrc, tdst})
   516  				}
   517  			}
   518  		}
   519  	}
   520  	slices.SortFunc(v, compareSimdTypePairs)
   521  	return v
   522  }
   523  
   524  func masksFromTypeMap(typeMap simdTypeMap) []simdType {
   525  	m := []simdType{}
   526  	for _, ts := range typeMap {
   527  		for _, tsrc := range ts {
   528  			if tsrc.Type == "mask" {
   529  				m = append(m, tsrc)
   530  			}
   531  		}
   532  	}
   533  	slices.SortFunc(m, compareSimdTypes)
   534  	return m
   535  }
   536  
   537  func typesFromTypeMap(typeMap simdTypeMap) []simdType {
   538  	m := []simdType{}
   539  	for _, ts := range typeMap {
   540  		for _, tsrc := range ts {
   541  			if tsrc.Lanes > 1 {
   542  				m = append(m, tsrc)
   543  			}
   544  		}
   545  	}
   546  	slices.SortFunc(m, compareSimdTypes)
   547  	return m
   548  }
   549  
   550  // writeSIMDTypes generates the simd vector types into a bytes.Buffer
   551  func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
   552  	t := templateOf(simdTypesTemplates, "types_amd64")
   553  	loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
   554  	maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
   555  	maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
   556  
   557  	buffer := new(bytes.Buffer)
   558  	buffer.WriteString(simdPackageHeader)
   559  
   560  	sizes := make([]int, 0, len(typeMap))
   561  	for size, types := range typeMap {
   562  		slices.SortFunc(types, compareSimdTypes)
   563  		sizes = append(sizes, size)
   564  	}
   565  	sort.Ints(sizes)
   566  
   567  	for _, size := range sizes {
   568  		if size <= 64 {
   569  			// these are scalar
   570  			continue
   571  		}
   572  		if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil {
   573  			panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err))
   574  		}
   575  		for _, typeDef := range typeMap[size] {
   576  			if typeDef.Lanes == 1 {
   577  				continue
   578  			}
   579  			if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
   580  				panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
   581  			}
   582  			if typeDef.Type != "mask" {
   583  				if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil {
   584  					panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err))
   585  				}
   586  				// restrict to AVX2 masked loads/stores first.
   587  				if typeDef.MaskedLoadStoreFilter() {
   588  					if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil {
   589  						panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err))
   590  					}
   591  				}
   592  			} else {
   593  				if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
   594  					panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
   595  				}
   596  			}
   597  		}
   598  	}
   599  
   600  	return buffer
   601  }
   602  
   603  type goarchFeatures struct {
   604  	// featureVar is the name of the exported feature-check variable for this
   605  	// architecture.
   606  	featureVar string
   607  
   608  	// features records per-feature information.
   609  	features map[string]featureInfo
   610  }
   611  
   612  type featureInfo struct {
   613  	// Implies is a list of other CPU features that are required for this
   614  	// feature. These are allowed to chain.
   615  	//
   616  	// For example, if the Frob feature lists "Baz", then if X.Frob() returns
   617  	// true, it must also be true that the CPU has feature Baz.
   618  	Implies []string
   619  
   620  	// Virtual means this feature is not represented directly in internal/cpu,
   621  	// but is instead the logical AND of the features in Implies.
   622  	Virtual bool
   623  }
   624  
   625  // goarchFeatureInfo maps from GOARCH to CPU feature to additional information
   626  // about that feature. Not all features need to be in this map.
   627  var goarchFeatureInfo = make(map[string]goarchFeatures)
   628  
   629  func registerFeatureInfo(goArch string, features goarchFeatures) {
   630  	goarchFeatureInfo[goArch] = features
   631  }
   632  
   633  func featureImplies(goarch string, base string) string {
   634  	// Compute the transitive closure of base.
   635  	var list []string
   636  	var visit func(f string)
   637  	visit = func(f string) {
   638  		list = append(list, f)
   639  		for _, dep := range goarchFeatureInfo[goarch].features[f].Implies {
   640  			visit(dep)
   641  		}
   642  	}
   643  	visit(base)
   644  	// Drop base
   645  	list = list[1:]
   646  	// Put in "nice" order
   647  	slices.Reverse(list)
   648  	// Combine into a comment-ready form
   649  	switch len(list) {
   650  	case 0:
   651  		return ""
   652  	case 1:
   653  		return list[0]
   654  	case 2:
   655  		return list[0] + " and " + list[1]
   656  	default:
   657  		list[len(list)-1] = "and " + list[len(list)-1]
   658  		return strings.Join(list, ", ")
   659  	}
   660  }
   661  
   662  func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
   663  	// Gather all features
   664  	type featureKey struct {
   665  		GoArch  string
   666  		Feature string
   667  	}
   668  	featureSet := make(map[featureKey]struct{})
   669  	for _, op := range ops {
   670  		// Generate a feature check for each independant feature in a
   671  		// composite feature.
   672  		for feature := range strings.SplitSeq(op.CPUFeature, ",") {
   673  			feature = strings.TrimSpace(feature)
   674  			featureSet[featureKey{op.GoArch, feature}] = struct{}{}
   675  		}
   676  	}
   677  	featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
   678  		if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
   679  			return c
   680  		}
   681  		return compareNatural(a.Feature, b.Feature)
   682  	})
   683  
   684  	// TODO: internal/cpu doesn't enforce these at all. You can even do
   685  	// GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off
   686  	// AVX2. We need to push these dependencies into it somehow.
   687  	type feature struct {
   688  		featureKey
   689  		FeatureVar string
   690  		Virtual    bool
   691  		Implies    []string
   692  		ImpliesAll string
   693  	}
   694  	var features []feature
   695  	for _, k := range featureKeys {
   696  		featureVar := goarchFeatureInfo[k.GoArch].featureVar
   697  		fi := goarchFeatureInfo[k.GoArch].features[k.Feature]
   698  		features = append(features, feature{
   699  			featureKey: k,
   700  			FeatureVar: featureVar,
   701  			Virtual:    fi.Virtual,
   702  			Implies:    fi.Implies,
   703  			ImpliesAll: featureImplies(k.GoArch, k.Feature),
   704  		})
   705  	}
   706  
   707  	// If we ever have the same feature name on more than one GOARCH, we'll have
   708  	// to be more careful about this.
   709  	t := templateOf(simdFeaturesTemplate, "features")
   710  
   711  	buffer := new(bytes.Buffer)
   712  	buffer.WriteString(simdPackageHeader)
   713  
   714  	if err := t.Execute(buffer, features); err != nil {
   715  		panic(fmt.Errorf("failed to execute features template: %w", err))
   716  	}
   717  
   718  	return buffer
   719  }
   720  
   721  // writeSIMDStubs returns two bytes.Buffers containing the declarations for the public
   722  // and internal-use vector intrinsics.
   723  func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer) {
   724  	t := templateOf(simdStubsTmpl, "simdStubs")
   725  	f = new(bytes.Buffer)
   726  	fI = new(bytes.Buffer)
   727  	f.WriteString(simdPackageHeader)
   728  	fI.WriteString(simdPackageHeader)
   729  
   730  	slices.SortFunc(ops, compareOperations)
   731  
   732  	for i, op := range ops {
   733  		if op.NoTypes != nil && *op.NoTypes == "true" {
   734  			continue
   735  		}
   736  		if op.SkipMaskedMethod() {
   737  			continue
   738  		}
   739  		idxVecAsScalar, err := checkVecAsScalar(op)
   740  		if err != nil {
   741  			panic(err)
   742  		}
   743  		if s, op, err := classifyOp(op); err == nil {
   744  			if idxVecAsScalar != -1 {
   745  				if s == "op2" || s == "op3" {
   746  					s += "VecAsScalar"
   747  				} else {
   748  					panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize"))
   749  				}
   750  			}
   751  			if i == 0 || op.Go != ops[i-1].Go {
   752  				if unicode.IsUpper([]rune(op.Go)[0]) {
   753  					fmt.Fprintf(f, "\n/* %s */\n", op.Go)
   754  				} else {
   755  					fmt.Fprintf(fI, "\n/* %s */\n", op.Go)
   756  				}
   757  			}
   758  			if unicode.IsUpper([]rune(op.Go)[0]) {
   759  				if err := t.ExecuteTemplate(f, s, op); err != nil {
   760  					panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
   761  				}
   762  			} else {
   763  				if err := t.ExecuteTemplate(fI, s, op); err != nil {
   764  					panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
   765  				}
   766  			}
   767  		} else {
   768  			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
   769  		}
   770  	}
   771  
   772  	vectorConversions := vConvertFromTypeMap(typeMap)
   773  	for _, conv := range vectorConversions {
   774  		if err := t.ExecuteTemplate(f, "vectorConversion", conv); err != nil {
   775  			panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
   776  		}
   777  	}
   778  
   779  	masks := masksFromTypeMap(typeMap)
   780  	for _, mask := range masks {
   781  		if err := t.ExecuteTemplate(f, "mask", mask); err != nil {
   782  			panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err))
   783  		}
   784  	}
   785  
   786  	return
   787  }
   788  

View as plain text