1
2
3
4
5 package main
6
7 import (
8 "bytes"
9 "cmp"
10 "fmt"
11 "maps"
12 "slices"
13 "sort"
14 "strings"
15 "unicode"
16 )
17
18 type simdType struct {
19 Name string
20 Lanes int
21 Base string
22 Fields string
23 Type string
24 VectorCounterpart string
25 ReshapedVectorWithAndOr string
26 Size int
27 }
28
29 func (x simdType) ElemBits() int {
30 return x.Size / x.Lanes
31 }
32
33 func (x simdType) Article() string {
34 if strings.HasPrefix(x.Name, "Int") {
35 return "an"
36 }
37 return "a"
38 }
39
40
41
42
43 func (x simdType) LanesContainer() int {
44 if x.Lanes > 64 {
45 panic("too many lanes")
46 }
47 if x.Lanes > 32 {
48 return 64
49 }
50 if x.Lanes > 16 {
51 return 32
52 }
53 if x.Lanes > 8 {
54 return 16
55 }
56 return 8
57 }
58
59
60
61
62 func (x simdType) MaskedLoadStoreFilter() bool {
63 return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask"
64 }
65
66 func (x simdType) IntelSizeSuffix() string {
67 switch x.ElemBits() {
68 case 8:
69 return "B"
70 case 16:
71 return "W"
72 case 32:
73 return "D"
74 case 64:
75 return "Q"
76 }
77 panic("oops")
78 }
79
80 func (x simdType) MaskedLoadDoc() string {
81 if x.Size == 512 || x.ElemBits() < 32 {
82 return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits())
83 } else {
84 return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
85 }
86 }
87
88 func (x simdType) MaskedStoreDoc() string {
89 if x.Size == 512 || x.ElemBits() < 32 {
90 return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits())
91 } else {
92 return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
93 }
94 }
95
96 func (x simdType) ToBitsDoc() string {
97 if x.Size == 512 || x.ElemBits() == 16 {
98 return fmt.Sprintf("// Asm: KMOV%s, CPU Features: AVX512", x.IntelSizeSuffix())
99 }
100
101 var asm string
102 var feat string
103 switch x.ElemBits() {
104 case 8:
105 asm = "VPMOVMSKB"
106 if x.Size == 256 {
107 feat = "AVX2"
108 } else {
109 feat = "AVX"
110 }
111 case 32:
112 asm = "VMOVMSKPS"
113 feat = "AVX"
114 case 64:
115 asm = "VMOVMSKPD"
116 feat = "AVX"
117 default:
118 panic("unexpected ElemBits")
119 }
120 return fmt.Sprintf("// Asm: %s, CPU Features: %s", asm, feat)
121 }
122
123 func compareSimdTypes(x, y simdType) int {
124
125 if c := -compareNatural(x.Type, y.Type); c != 0 {
126 return c
127 }
128
129
130
131 if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 {
132 return c
133 }
134
135 if c := x.ElemBits() - y.ElemBits(); c != 0 {
136 return c
137 }
138
139 return x.Size - y.Size
140 }
141
142 type simdTypeMap map[int][]simdType
143
144 type simdTypePair struct {
145 Tsrc simdType
146 Tdst simdType
147 }
148
149 func compareSimdTypePairs(x, y simdTypePair) int {
150 c := compareSimdTypes(x.Tsrc, y.Tsrc)
151 if c != 0 {
152 return c
153 }
154 return compareSimdTypes(x.Tdst, y.Tdst)
155 }
156
157 const simdPackageHeader = generatedHeader + `
158 //go:build goexperiment.simd
159
160 package archsimd
161 `
162
163 const simdTypesTemplates = `
164 {{define "sizeTmpl"}}
165 // v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD
166 type v{{.}} struct {
167 _{{.}} [0]func() // uncomparable
168 }
169 {{end}}
170
171 {{define "typeTmpl"}}
172 {{- if eq .Type "mask"}}
173 // {{.Name}} is a mask for a SIMD vector of {{.Lanes}} {{.ElemBits}}-bit elements.
174 {{- else}}
175 // {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}s.
176 {{- end}}
177 type {{.Name}} struct {
178 {{.Fields}}
179 }
180
181 {{end}}
182 `
183
184 const simdFeaturesTemplate = `
185 import "internal/cpu"
186
187 type X86Features struct {}
188
189 var X86 X86Features
190
191 {{range .}}
192 {{$f := .}}
193 {{- if eq .Feature "AVX512"}}
194 // {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
195 //
196 // These five CPU features are bundled together, and no use of AVX-512
197 // is allowed unless all of these features are supported together.
198 // Nearly every CPU that has shipped with any support for AVX-512 has
199 // supported all five of these features.
200 {{- else -}}
201 // {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
202 {{- end}}
203 {{- if ne .ImpliesAll ""}}
204 //
205 // If it returns true, then the CPU also supports {{.ImpliesAll}}.
206 {{- end}}
207 //
208 // {{.Feature}} is defined on all GOARCHes, but will only return true on
209 // GOARCH {{.GoArch}}.
210 func ({{.FeatureVar}}Features) {{.Feature}}() bool {
211 {{- if .Virtual}}
212 return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}}
213 {{- else}}
214 return cpu.{{.FeatureVar}}.Has{{.Feature}}
215 {{- end}}
216 }
217 {{end}}
218 `
219
220 const simdLoadStoreTemplate = `
221 // Len returns the number of elements in {{.Article}} {{.Name}}.
222 func (x {{.Name}}) Len() int { return {{.Lanes}} }
223
224 // Load{{.Name}} loads {{.Article}} {{.Name}} from an array.
225 //
226 //go:noescape
227 func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
228
229 // Store stores {{.Article}} {{.Name}} to an array.
230 //
231 //go:noescape
232 func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
233 `
234
235 const simdMaskFromValTemplate = `
236 // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
237 {{- if ne .Lanes .LanesContainer}}
238 // Only the lower {{.Lanes}} bits of y are used.
239 {{- end}}
240 //
241 // Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512
242 func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
243
244 // ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset.
245 {{- if ne .Lanes .LanesContainer}}
246 // Only the lower {{.Lanes}} bits of y are used.
247 {{- end}}
248 //
249 {{.ToBitsDoc}}
250 func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
251 `
252
253 const simdMaskedLoadStoreTemplate = `
254 // LoadMasked{{.Name}} loads {{.Article}} {{.Name}} from an array,
255 // at those elements enabled by mask.
256 //
257 {{.MaskedLoadDoc}}
258 //
259 //go:noescape
260 func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
261
262 // StoreMasked stores {{.Article}} {{.Name}} to an array,
263 // at those elements enabled by mask.
264 //
265 {{.MaskedStoreDoc}}
266 //
267 //go:noescape
268 func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
269 `
270
271 const simdStubsTmpl = `
272 {{define "op1"}}
273 {{if .Documentation}}{{.Documentation}}
274 //{{end}}
275 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
276 func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}}
277 {{end}}
278
279 {{define "op2"}}
280 {{if .Documentation}}{{.Documentation}}
281 //{{end}}
282 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
283 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
284 {{end}}
285
286 {{define "op2_21"}}
287 {{if .Documentation}}{{.Documentation}}
288 //{{end}}
289 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
290 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
291 {{end}}
292
293 {{define "op2_21Type1"}}
294 {{if .Documentation}}{{.Documentation}}
295 //{{end}}
296 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
297 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
298 {{end}}
299
300 {{define "op3"}}
301 {{if .Documentation}}{{.Documentation}}
302 //{{end}}
303 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
304 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
305 {{end}}
306
307 {{define "op3_31Zero3"}}
308 {{if .Documentation}}{{.Documentation}}
309 //{{end}}
310 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
311 func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
312 {{end}}
313
314 {{define "op3_21"}}
315 {{if .Documentation}}{{.Documentation}}
316 //{{end}}
317 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
318 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
319 {{end}}
320
321 {{define "op3_21Type1"}}
322 {{if .Documentation}}{{.Documentation}}
323 //{{end}}
324 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
325 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
326 {{end}}
327
328 {{define "op3_231Type1"}}
329 {{if .Documentation}}{{.Documentation}}
330 //{{end}}
331 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
332 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
333 {{end}}
334
335 {{define "op2VecAsScalar"}}
336 {{if .Documentation}}{{.Documentation}}
337 //{{end}}
338 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
339 func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
340 {{end}}
341
342 {{define "op3VecAsScalar"}}
343 {{if .Documentation}}{{.Documentation}}
344 //{{end}}
345 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
346 func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
347 {{end}}
348
349 {{define "op4"}}
350 {{if .Documentation}}{{.Documentation}}
351 //{{end}}
352 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
353 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
354 {{end}}
355
356 {{define "op4_231Type1"}}
357 {{if .Documentation}}{{.Documentation}}
358 //{{end}}
359 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
360 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
361 {{end}}
362
363 {{define "op4_31"}}
364 {{if .Documentation}}{{.Documentation}}
365 //{{end}}
366 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
367 func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
368 {{end}}
369
370 {{define "op1Imm8"}}
371 {{if .Documentation}}{{.Documentation}}
372 //{{end}}
373 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
374 //
375 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
376 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
377 {{end}}
378
379 {{define "op2Imm8"}}
380 {{if .Documentation}}{{.Documentation}}
381 //{{end}}
382 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
383 //
384 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
385 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
386 {{end}}
387
388 {{define "op2Imm8_2I"}}
389 {{if .Documentation}}{{.Documentation}}
390 //{{end}}
391 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
392 //
393 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
394 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
395 {{end}}
396
397 {{define "op2Imm8_II"}}
398 {{if .Documentation}}{{.Documentation}}
399 //{{end}}
400 // {{.ImmName}} result in better performance when they are constants, non-constant values will be translated into a jump table.
401 // {{.ImmName}} should be between 0 and 3, inclusive; other values may result in a runtime panic.
402 //
403 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
404 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
405 {{end}}
406
407 {{define "op2Imm8_SHA1RNDS4"}}
408 {{if .Documentation}}{{.Documentation}}
409 //{{end}}
410 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
411 //
412 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
413 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
414 {{end}}
415
416 {{define "op3Imm8"}}
417 {{if .Documentation}}{{.Documentation}}
418 //{{end}}
419 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
420 //
421 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
422 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
423 {{end}}
424
425 {{define "op3Imm8_2I"}}
426 {{if .Documentation}}{{.Documentation}}
427 //{{end}}
428 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
429 //
430 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
431 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
432 {{end}}
433
434
435 {{define "op4Imm8"}}
436 {{if .Documentation}}{{.Documentation}}
437 //{{end}}
438 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
439 //
440 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
441 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
442 {{end}}
443
444 {{define "vectorConversion"}}
445 // As{{.Tdst.Name}} returns {{.Tdst.Article}} {{.Tdst.Name}} with the same bit representation as x.
446 func (x {{.Tsrc.Name}}) As{{.Tdst.Name}}() {{.Tdst.Name}}
447 {{end}}
448
449 {{define "mask"}}
450 // To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}.
451 func (from {{.Name}}) To{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
452
453 // asMask converts from {{.VectorCounterpart}} to {{.Name}}.
454 func (from {{.VectorCounterpart}}) asMask() (to {{.Name}})
455
456 func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
457
458 func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
459 {{end}}
460 `
461
462
463
464 func parseSIMDTypes(ops []Operation) simdTypeMap {
465
466 ret := map[int][]simdType{}
467 seen := map[string]struct{}{}
468 processArg := func(arg Operand) {
469 if arg.Class == "immediate" || arg.Class == "greg" {
470
471 return
472 }
473 if _, ok := seen[*arg.Go]; ok {
474 return
475 }
476 seen[*arg.Go] = struct{}{}
477
478 lanes := *arg.Lanes
479 base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits)
480 tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes)
481 tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits)
482 valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base)
483 fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS)
484 if arg.Class == "mask" {
485 vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
486 reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
487 ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
488
489 if _, ok := seen[vectorCounterpart]; !ok {
490 seen[vectorCounterpart] = struct{}{}
491 ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
492 }
493 } else {
494 ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
495 }
496 }
497 for _, op := range ops {
498 for _, arg := range op.In {
499 processArg(arg)
500 }
501 for _, arg := range op.Out {
502 processArg(arg)
503 }
504 }
505 return ret
506 }
507
508 func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
509 v := []simdTypePair{}
510 for _, ts := range typeMap {
511 for i, tsrc := range ts {
512 for j, tdst := range ts {
513 if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" &&
514 tsrc.Lanes > 1 && tdst.Lanes > 1 {
515 v = append(v, simdTypePair{tsrc, tdst})
516 }
517 }
518 }
519 }
520 slices.SortFunc(v, compareSimdTypePairs)
521 return v
522 }
523
524 func masksFromTypeMap(typeMap simdTypeMap) []simdType {
525 m := []simdType{}
526 for _, ts := range typeMap {
527 for _, tsrc := range ts {
528 if tsrc.Type == "mask" {
529 m = append(m, tsrc)
530 }
531 }
532 }
533 slices.SortFunc(m, compareSimdTypes)
534 return m
535 }
536
537 func typesFromTypeMap(typeMap simdTypeMap) []simdType {
538 m := []simdType{}
539 for _, ts := range typeMap {
540 for _, tsrc := range ts {
541 if tsrc.Lanes > 1 {
542 m = append(m, tsrc)
543 }
544 }
545 }
546 slices.SortFunc(m, compareSimdTypes)
547 return m
548 }
549
550
551 func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
552 t := templateOf(simdTypesTemplates, "types_amd64")
553 loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
554 maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
555 maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
556
557 buffer := new(bytes.Buffer)
558 buffer.WriteString(simdPackageHeader)
559
560 sizes := make([]int, 0, len(typeMap))
561 for size, types := range typeMap {
562 slices.SortFunc(types, compareSimdTypes)
563 sizes = append(sizes, size)
564 }
565 sort.Ints(sizes)
566
567 for _, size := range sizes {
568 if size <= 64 {
569
570 continue
571 }
572 if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil {
573 panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err))
574 }
575 for _, typeDef := range typeMap[size] {
576 if typeDef.Lanes == 1 {
577 continue
578 }
579 if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
580 panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
581 }
582 if typeDef.Type != "mask" {
583 if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil {
584 panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err))
585 }
586
587 if typeDef.MaskedLoadStoreFilter() {
588 if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil {
589 panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err))
590 }
591 }
592 } else {
593 if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
594 panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
595 }
596 }
597 }
598 }
599
600 return buffer
601 }
602
603 type goarchFeatures struct {
604
605
606 featureVar string
607
608
609 features map[string]featureInfo
610 }
611
612 type featureInfo struct {
613
614
615
616
617
618 Implies []string
619
620
621
622 Virtual bool
623 }
624
625
626
627 var goarchFeatureInfo = make(map[string]goarchFeatures)
628
629 func registerFeatureInfo(goArch string, features goarchFeatures) {
630 goarchFeatureInfo[goArch] = features
631 }
632
633 func featureImplies(goarch string, base string) string {
634
635 var list []string
636 var visit func(f string)
637 visit = func(f string) {
638 list = append(list, f)
639 for _, dep := range goarchFeatureInfo[goarch].features[f].Implies {
640 visit(dep)
641 }
642 }
643 visit(base)
644
645 list = list[1:]
646
647 slices.Reverse(list)
648
649 switch len(list) {
650 case 0:
651 return ""
652 case 1:
653 return list[0]
654 case 2:
655 return list[0] + " and " + list[1]
656 default:
657 list[len(list)-1] = "and " + list[len(list)-1]
658 return strings.Join(list, ", ")
659 }
660 }
661
662 func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
663
664 type featureKey struct {
665 GoArch string
666 Feature string
667 }
668 featureSet := make(map[featureKey]struct{})
669 for _, op := range ops {
670
671
672 for feature := range strings.SplitSeq(op.CPUFeature, ",") {
673 feature = strings.TrimSpace(feature)
674 featureSet[featureKey{op.GoArch, feature}] = struct{}{}
675 }
676 }
677 featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
678 if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
679 return c
680 }
681 return compareNatural(a.Feature, b.Feature)
682 })
683
684
685
686
687 type feature struct {
688 featureKey
689 FeatureVar string
690 Virtual bool
691 Implies []string
692 ImpliesAll string
693 }
694 var features []feature
695 for _, k := range featureKeys {
696 featureVar := goarchFeatureInfo[k.GoArch].featureVar
697 fi := goarchFeatureInfo[k.GoArch].features[k.Feature]
698 features = append(features, feature{
699 featureKey: k,
700 FeatureVar: featureVar,
701 Virtual: fi.Virtual,
702 Implies: fi.Implies,
703 ImpliesAll: featureImplies(k.GoArch, k.Feature),
704 })
705 }
706
707
708
709 t := templateOf(simdFeaturesTemplate, "features")
710
711 buffer := new(bytes.Buffer)
712 buffer.WriteString(simdPackageHeader)
713
714 if err := t.Execute(buffer, features); err != nil {
715 panic(fmt.Errorf("failed to execute features template: %w", err))
716 }
717
718 return buffer
719 }
720
721
722
723 func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer) {
724 t := templateOf(simdStubsTmpl, "simdStubs")
725 f = new(bytes.Buffer)
726 fI = new(bytes.Buffer)
727 f.WriteString(simdPackageHeader)
728 fI.WriteString(simdPackageHeader)
729
730 slices.SortFunc(ops, compareOperations)
731
732 for i, op := range ops {
733 if op.NoTypes != nil && *op.NoTypes == "true" {
734 continue
735 }
736 if op.SkipMaskedMethod() {
737 continue
738 }
739 idxVecAsScalar, err := checkVecAsScalar(op)
740 if err != nil {
741 panic(err)
742 }
743 if s, op, err := classifyOp(op); err == nil {
744 if idxVecAsScalar != -1 {
745 if s == "op2" || s == "op3" {
746 s += "VecAsScalar"
747 } else {
748 panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize"))
749 }
750 }
751 if i == 0 || op.Go != ops[i-1].Go {
752 if unicode.IsUpper([]rune(op.Go)[0]) {
753 fmt.Fprintf(f, "\n/* %s */\n", op.Go)
754 } else {
755 fmt.Fprintf(fI, "\n/* %s */\n", op.Go)
756 }
757 }
758 if unicode.IsUpper([]rune(op.Go)[0]) {
759 if err := t.ExecuteTemplate(f, s, op); err != nil {
760 panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
761 }
762 } else {
763 if err := t.ExecuteTemplate(fI, s, op); err != nil {
764 panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
765 }
766 }
767 } else {
768 panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
769 }
770 }
771
772 vectorConversions := vConvertFromTypeMap(typeMap)
773 for _, conv := range vectorConversions {
774 if err := t.ExecuteTemplate(f, "vectorConversion", conv); err != nil {
775 panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
776 }
777 }
778
779 masks := masksFromTypeMap(typeMap)
780 for _, mask := range masks {
781 if err := t.ExecuteTemplate(f, "mask", mask); err != nil {
782 panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err))
783 }
784 }
785
786 return
787 }
788
View as plain text