Source file test/codegen/mathbits.go
1 // asmcheck 2 3 // Copyright 2018 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package codegen 8 9 import ( 10 "math/bits" 11 "unsafe" 12 ) 13 14 // ----------------------- // 15 // bits.LeadingZeros // 16 // ----------------------- // 17 18 func LeadingZeros(n uint) int { 19 // amd64/v1,amd64/v2:"BSRQ" 20 // amd64/v3:"LZCNTQ", -"BSRQ" 21 // arm64:"CLZ" 22 // arm:"CLZ" 23 // loong64:"CLZV",-"SUB" 24 // mips:"CLZ" 25 // ppc64x:"CNTLZD" 26 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"SUB" 27 // s390x:"FLOGR" 28 // wasm:"I64Clz" 29 return bits.LeadingZeros(n) 30 } 31 32 func LeadingZeros64(n uint64) int { 33 // amd64/v1,amd64/v2:"BSRQ" 34 // amd64/v3:"LZCNTQ", -"BSRQ" 35 // arm:"CLZ" 36 // arm64:"CLZ" 37 // loong64:"CLZV",-"SUB" 38 // mips:"CLZ" 39 // ppc64x:"CNTLZD" 40 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI" 41 // s390x:"FLOGR" 42 // wasm:"I64Clz" 43 return bits.LeadingZeros64(n) 44 } 45 46 func LeadingZeros32(n uint32) int { 47 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 48 // amd64/v3: "LZCNTL",- "BSRL" 49 // arm:"CLZ" 50 // arm64:"CLZW" 51 // loong64:"CLZW",-"SUB" 52 // mips:"CLZ" 53 // ppc64x:"CNTLZW" 54 // riscv64/rva22u64,riscv64/rva23u64:"CLZW",-"ADDI" 55 // s390x:"FLOGR" 56 // wasm:"I64Clz" 57 return bits.LeadingZeros32(n) 58 } 59 60 func LeadingZeros16(n uint16) int { 61 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 62 // amd64/v3: "LZCNTL",- "BSRL" 63 // arm64:"CLZ" 64 // arm:"CLZ" 65 // loong64:"CLZV" 66 // mips:"CLZ" 67 // ppc64x:"CNTLZD" 68 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-48",-"NEG" 69 // s390x:"FLOGR" 70 // wasm:"I64Clz" 71 return bits.LeadingZeros16(n) 72 } 73 74 func LeadingZeros8(n uint8) int { 75 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 76 // amd64/v3: "LZCNTL",- "BSRL" 77 // arm64:"CLZ" 78 // arm:"CLZ" 79 // loong64:"CLZV" 80 // mips:"CLZ" 81 // ppc64x:"CNTLZD" 82 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-56",-"NEG" 83 // s390x:"FLOGR" 84 // wasm:"I64Clz" 85 return bits.LeadingZeros8(n) 86 } 87 88 // --------------- // 89 // bits.Len* // 90 // --------------- // 91 92 func Len(n uint) int { 93 // amd64/v1,amd64/v2:"BSRQ" 94 // amd64/v3: "LZCNTQ" 95 // arm64:"CLZ" 96 // arm:"CLZ" 97 // loong64:"CLZV" 98 // mips:"CLZ" 99 // ppc64x:"SUBC","CNTLZD" 100 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" 101 // s390x:"FLOGR" 102 // wasm:"I64Clz" 103 return bits.Len(n) 104 } 105 106 func Len64(n uint64) int { 107 // amd64/v1,amd64/v2:"BSRQ" 108 // amd64/v3: "LZCNTQ" 109 // arm64:"CLZ" 110 // arm:"CLZ" 111 // loong64:"CLZV" 112 // mips:"CLZ" 113 // ppc64x:"SUBC","CNTLZD" 114 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" 115 // s390x:"FLOGR" 116 // wasm:"I64Clz" 117 return bits.Len64(n) 118 } 119 120 func SubFromLen64(n uint64) int { 121 // loong64:"CLZV",-"ADD" 122 // ppc64x:"CNTLZD",-"SUBC" 123 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI",-"NEG" 124 return 64 - bits.Len64(n) 125 } 126 127 func CompareWithLen64(n uint64) bool { 128 // loong64:"CLZV",-"ADD",-"[$]64",-"[$]9" 129 return bits.Len64(n) < 9 130 } 131 132 func Len32(n uint32) int { 133 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 134 // amd64/v3: "LZCNTL" 135 // arm64:"CLZ" 136 // arm:"CLZ" 137 // loong64:"CLZW" 138 // mips:"CLZ" 139 // ppc64x: "CNTLZW" 140 // riscv64/rva22u64,riscv64/rva23u64:"CLZW","ADDI\t\\$-32" 141 // s390x:"FLOGR" 142 // wasm:"I64Clz" 143 return bits.Len32(n) 144 } 145 146 func Len16(n uint16) int { 147 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 148 // amd64/v3: "LZCNTL" 149 // arm64:"CLZ" 150 // arm:"CLZ" 151 // loong64:"CLZV" 152 // mips:"CLZ" 153 // ppc64x:"SUBC","CNTLZD" 154 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" 155 // s390x:"FLOGR" 156 // wasm:"I64Clz" 157 return bits.Len16(n) 158 } 159 160 func Len8(n uint8) int { 161 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 162 // amd64/v3: "LZCNTL" 163 // arm64:"CLZ" 164 // arm:"CLZ" 165 // loong64:"CLZV" 166 // mips:"CLZ" 167 // ppc64x:"SUBC","CNTLZD" 168 // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" 169 // s390x:"FLOGR" 170 // wasm:"I64Clz" 171 return bits.Len8(n) 172 } 173 174 // -------------------- // 175 // bits.OnesCount // 176 // -------------------- // 177 178 // TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested. 179 func OnesCount(n uint) int { 180 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 181 // amd64:"POPCNTQ" 182 // arm64:"VCNT","VUADDLV" 183 // loong64:"VPCNTV" 184 // ppc64x:"POPCNTD" 185 // riscv64:"CPOP\t" 186 // s390x:"POPCNT" 187 // wasm:"I64Popcnt" 188 return bits.OnesCount(n) 189 } 190 191 func OnesCount64(n uint64) int { 192 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 193 // amd64:"POPCNTQ" 194 // arm64:"VCNT","VUADDLV" 195 // loong64:"VPCNTV" 196 // ppc64x:"POPCNTD" 197 // riscv64:"CPOP\t" 198 // s390x:"POPCNT" 199 // wasm:"I64Popcnt" 200 return bits.OnesCount64(n) 201 } 202 203 func OnesCount32(n uint32) int { 204 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 205 // amd64:"POPCNTL" 206 // arm64:"VCNT","VUADDLV" 207 // loong64:"VPCNTW" 208 // ppc64x:"POPCNTW" 209 // riscv64:"CPOPW" 210 // s390x:"POPCNT" 211 // wasm:"I64Popcnt" 212 return bits.OnesCount32(n) 213 } 214 215 func OnesCount16(n uint16) int { 216 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 217 // amd64:"POPCNTL" 218 // arm64:"VCNT","VUADDLV" 219 // loong64:"VPCNTH" 220 // ppc64x:"POPCNTW" 221 // riscv64:"CPOP\t" 222 // s390x:"POPCNT" 223 // wasm:"I64Popcnt" 224 return bits.OnesCount16(n) 225 } 226 227 func OnesCount8(n uint8) int { 228 // ppc64x:"POPCNTB" 229 // riscv64/rva22u64,riscv64/rva23u64:"CPOP\t" 230 // s390x:"POPCNT" 231 // wasm:"I64Popcnt" 232 return bits.OnesCount8(n) 233 } 234 235 // ------------------ // 236 // bits.Reverse // 237 // ------------------ // 238 239 func Reverse(n uint) uint { 240 // loong64:"BITREVV" 241 return bits.Reverse(n) 242 } 243 244 func Reverse64(n uint64) uint64 { 245 // loong64:"BITREVV" 246 return bits.Reverse64(n) 247 } 248 249 func Reverse32(n uint32) uint32 { 250 // loong64:"BITREVW" 251 return bits.Reverse32(n) 252 } 253 254 func Reverse16(n uint16) uint16 { 255 // loong64:"BITREV4B","REVB2H" 256 return bits.Reverse16(n) 257 } 258 259 func Reverse8(n uint8) uint8 { 260 // loong64:"BITREV4B" 261 return bits.Reverse8(n) 262 } 263 264 // ----------------------- // 265 // bits.ReverseBytes // 266 // ----------------------- // 267 268 func ReverseBytes(n uint) uint { 269 // 386:"BSWAPL" 270 // amd64:"BSWAPQ" 271 // arm64:"REV" 272 // loong64:"REVBV" 273 // riscv64/rva22u64,riscv64/rva23u64:"REV8" 274 // s390x:"MOVDBR" 275 return bits.ReverseBytes(n) 276 } 277 278 func ReverseBytes64(n uint64) uint64 { 279 // 386:"BSWAPL" 280 // amd64:"BSWAPQ" 281 // arm64:"REV" 282 // loong64:"REVBV" 283 // ppc64x/power10: "BRD" 284 // riscv64/rva22u64,riscv64/rva23u64:"REV8" 285 // s390x:"MOVDBR" 286 return bits.ReverseBytes64(n) 287 } 288 289 func ReverseBytes32(n uint32) uint32 { 290 // 386:"BSWAPL" 291 // amd64:"BSWAPL" 292 // arm64:"REVW" 293 // loong64:"REVB2W" 294 // ppc64x/power10: "BRW" 295 // riscv64/rva22u64,riscv64/rva23u64:"REV8","SRLI\t\\$32" 296 // s390x:"MOVWBR" 297 return bits.ReverseBytes32(n) 298 } 299 300 func ReverseBytes16(n uint16) uint16 { 301 // amd64:"ROLW" 302 // arm/5:"SLL","SRL","ORR" 303 // arm/6:"REV16" 304 // arm/7:"REV16" 305 // arm64:"REV16W",-"UBFX",-"ORR" 306 // loong64:"REVB2H" 307 // ppc64x/power10: "BRH" 308 // riscv64/rva22u64,riscv64/rva23u64:"REV8","SRLI\t\\$48" 309 return bits.ReverseBytes16(n) 310 } 311 312 // --------------------- // 313 // bits.RotateLeft // 314 // --------------------- // 315 316 func RotateLeft64(n uint64) uint64 { 317 // amd64:"ROLQ" 318 // arm64:"ROR" 319 // loong64:"ROTRV" 320 // ppc64x:"ROTL" 321 // riscv64:"RORI" 322 // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " 323 // wasm:"I64Rotl" 324 return bits.RotateLeft64(n, 37) 325 } 326 327 func RotateLeft32(n uint32) uint32 { 328 // amd64:"ROLL" 386:"ROLL" 329 // arm:`MOVW\tR[0-9]+@>23` 330 // arm64:"RORW" 331 // loong64:"ROTR\t" 332 // ppc64x:"ROTLW" 333 // riscv64:"RORIW" 334 // s390x:"RLL" 335 // wasm:"I32Rotl" 336 return bits.RotateLeft32(n, 9) 337 } 338 339 func RotateLeft16(n uint16, s int) uint16 { 340 // amd64:"ROLW" 386:"ROLW" 341 // arm64:"RORW",-"CSEL" 342 // loong64:"ROTR\t","SLLV" 343 return bits.RotateLeft16(n, s) 344 } 345 346 func RotateLeft8(n uint8, s int) uint8 { 347 // amd64:"ROLB" 386:"ROLB" 348 // arm64:"LSL","LSR",-"CSEL" 349 // loong64:"OR","SLLV","SRLV" 350 return bits.RotateLeft8(n, s) 351 } 352 353 func RotateLeftVariable(n uint, m int) uint { 354 // amd64:"ROLQ" 355 // arm64:"ROR" 356 // loong64:"ROTRV" 357 // ppc64x:"ROTL" 358 // riscv64:"ROL" 359 // s390x:"RLLG" 360 // wasm:"I64Rotl" 361 return bits.RotateLeft(n, m) 362 } 363 364 func RotateLeftVariable64(n uint64, m int) uint64 { 365 // amd64:"ROLQ" 366 // arm64:"ROR" 367 // loong64:"ROTRV" 368 // ppc64x:"ROTL" 369 // riscv64:"ROL" 370 // s390x:"RLLG" 371 // wasm:"I64Rotl" 372 return bits.RotateLeft64(n, m) 373 } 374 375 func RotateLeftVariable32(n uint32, m int) uint32 { 376 // arm:`MOVW\tR[0-9]+@>R[0-9]+` 377 // amd64:"ROLL" 378 // arm64:"RORW" 379 // loong64:"ROTR\t" 380 // ppc64x:"ROTLW" 381 // riscv64:"ROLW" 382 // s390x:"RLL" 383 // wasm:"I32Rotl" 384 return bits.RotateLeft32(n, m) 385 } 386 387 // ------------------------ // 388 // bits.TrailingZeros // 389 // ------------------------ // 390 391 func TrailingZeros(n uint) int { 392 // 386:"BSFL" 393 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 394 // amd64/v3:"TZCNTQ" 395 // arm:"CLZ" 396 // arm64:"RBIT","CLZ" 397 // loong64:"CTZV" 398 // ppc64x/power8:"ANDN","POPCNTD" 399 // ppc64x/power9: "CNTTZD" 400 // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" 401 // s390x:"FLOGR" 402 // wasm:"I64Ctz" 403 return bits.TrailingZeros(n) 404 } 405 406 func TrailingZeros64(n uint64) int { 407 // 386:"BSFL","JNE" 408 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 409 // amd64/v3:"TZCNTQ" 410 // arm64:"RBIT","CLZ" 411 // loong64:"CTZV" 412 // ppc64x/power8:"ANDN","POPCNTD" 413 // ppc64x/power9: "CNTTZD" 414 // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" 415 // s390x:"FLOGR" 416 // wasm:"I64Ctz" 417 return bits.TrailingZeros64(n) 418 } 419 420 func TrailingZeros64Subtract(n uint64) int { 421 // ppc64x/power8:"NEG","SUBC","ANDN","POPCNTD" 422 // ppc64x/power9:"SUBC","CNTTZD" 423 return bits.TrailingZeros64(1 - n) 424 } 425 426 func TrailingZeros32(n uint32) int { 427 // 386:"BSFL" 428 // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" 429 // amd64/v3:"TZCNTL" 430 // arm:"CLZ" 431 // arm64:"RBITW","CLZW" 432 // loong64:"CTZW" 433 // ppc64x/power8:"ANDN","POPCNTW" 434 // ppc64x/power9: "CNTTZW" 435 // riscv64/rva22u64,riscv64/rva23u64: "CTZW" 436 // s390x:"FLOGR","MOVWZ" 437 // wasm:"I64Ctz" 438 return bits.TrailingZeros32(n) 439 } 440 441 func TrailingZeros16(n uint16) int { 442 // 386:"BSFL\t" 443 // amd64:"BSFL","ORL\\t\\$65536" 444 // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" 445 // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" 446 // loong64:"CTZV" 447 // ppc64x/power8:"POPCNTW","ADD\t\\$-1" 448 // ppc64x/power9:"CNTTZD","ORIS\\t\\$1" 449 // riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$65536","CTZW" 450 // s390x:"FLOGR","OR\t\\$65536" 451 // wasm:"I64Ctz" 452 return bits.TrailingZeros16(n) 453 } 454 455 func TrailingZeros8(n uint8) int { 456 // 386:"BSFL" 457 // amd64:"BSFL","ORL\\t\\$256" 458 // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" 459 // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" 460 // loong64:"CTZV" 461 // ppc64x/power8:"POPCNTB","ADD\t\\$-1" 462 // ppc64x/power9:"CNTTZD","OR\t\\$256" 463 // riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$256","CTZW" 464 // s390x:"FLOGR","OR\t\\$256" 465 // wasm:"I64Ctz" 466 return bits.TrailingZeros8(n) 467 } 468 469 // IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero. 470 471 func IterateBits(n uint) int { 472 i := 0 473 for n != 0 { 474 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 475 // amd64/v3:"TZCNTQ" 476 i += bits.TrailingZeros(n) 477 n &= n - 1 478 } 479 return i 480 } 481 482 func IterateBits64(n uint64) int { 483 i := 0 484 for n != 0 { 485 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 486 // amd64/v3:"TZCNTQ" 487 // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" 488 i += bits.TrailingZeros64(n) 489 n &= n - 1 490 } 491 return i 492 } 493 494 func IterateBits32(n uint32) int { 495 i := 0 496 for n != 0 { 497 // amd64/v1,amd64/v2:"BSFL",-"BTSQ" 498 // amd64/v3:"TZCNTL" 499 // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" 500 i += bits.TrailingZeros32(n) 501 n &= n - 1 502 } 503 return i 504 } 505 506 func IterateBits16(n uint16) int { 507 i := 0 508 for n != 0 { 509 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 510 // amd64/v3:"TZCNTL" 511 // arm64:"RBITW","CLZW",-"ORR" 512 // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR" 513 i += bits.TrailingZeros16(n) 514 n &= n - 1 515 } 516 return i 517 } 518 519 func IterateBits8(n uint8) int { 520 i := 0 521 for n != 0 { 522 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 523 // amd64/v3:"TZCNTL" 524 // arm64:"RBITW","CLZW",-"ORR" 525 // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR" 526 i += bits.TrailingZeros8(n) 527 n &= n - 1 528 } 529 return i 530 } 531 532 // --------------- // 533 // bits.Add* // 534 // --------------- // 535 536 func Add(x, y, ci uint) (r, co uint) { 537 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 538 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 539 // ppc64x: "ADDC", "ADDE", "ADDZE" 540 // s390x:"ADDE","ADDC\t[$]-1," 541 // riscv64: "ADD","SLTU" 542 return bits.Add(x, y, ci) 543 } 544 545 func AddC(x, ci uint) (r, co uint) { 546 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 547 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 548 // loong64: "ADDV", "SGTU" 549 // ppc64x: "ADDC", "ADDE", "ADDZE" 550 // s390x:"ADDE","ADDC\t[$]-1," 551 // mips64:"ADDV","SGTU" 552 // riscv64: "ADD","SLTU" 553 return bits.Add(x, 7, ci) 554 } 555 556 func AddZ(x, y uint) (r, co uint) { 557 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 558 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 559 // loong64: "ADDV", "SGTU" 560 // ppc64x: "ADDC", -"ADDE", "ADDZE" 561 // s390x:"ADDC",-"ADDC\t[$]-1," 562 // mips64:"ADDV","SGTU" 563 // riscv64: "ADD","SLTU" 564 return bits.Add(x, y, 0) 565 } 566 567 func AddR(x, y, ci uint) uint { 568 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 569 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 570 // loong64: "ADDV", -"SGTU" 571 // ppc64x: "ADDC", "ADDE", -"ADDZE" 572 // s390x:"ADDE","ADDC\t[$]-1," 573 // mips64:"ADDV",-"SGTU" 574 // riscv64: "ADD",-"SLTU" 575 r, _ := bits.Add(x, y, ci) 576 return r 577 } 578 579 func AddM(p, q, r *[3]uint) { 580 var c uint 581 r[0], c = bits.Add(p[0], q[0], c) 582 // arm64:"ADCS",-"ADD\t",-"CMP" 583 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 584 // s390x:"ADDE",-"ADDC\t[$]-1," 585 r[1], c = bits.Add(p[1], q[1], c) 586 r[2], c = bits.Add(p[2], q[2], c) 587 } 588 589 func Add64(x, y, ci uint64) (r, co uint64) { 590 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 591 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 592 // loong64: "ADDV", "SGTU" 593 // ppc64x: "ADDC", "ADDE", "ADDZE" 594 // s390x:"ADDE","ADDC\t[$]-1," 595 // mips64:"ADDV","SGTU" 596 // riscv64: "ADD","SLTU" 597 return bits.Add64(x, y, ci) 598 } 599 600 func Add64C(x, ci uint64) (r, co uint64) { 601 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 602 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 603 // loong64: "ADDV", "SGTU" 604 // ppc64x: "ADDC", "ADDE", "ADDZE" 605 // s390x:"ADDE","ADDC\t[$]-1," 606 // mips64:"ADDV","SGTU" 607 // riscv64: "ADD","SLTU" 608 return bits.Add64(x, 7, ci) 609 } 610 611 func Add64Z(x, y uint64) (r, co uint64) { 612 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 613 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 614 // loong64: "ADDV", "SGTU" 615 // ppc64x: "ADDC", -"ADDE", "ADDZE" 616 // s390x:"ADDC",-"ADDC\t[$]-1," 617 // mips64:"ADDV","SGTU" 618 // riscv64: "ADD","SLTU" 619 return bits.Add64(x, y, 0) 620 } 621 622 func Add64R(x, y, ci uint64) uint64 { 623 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 624 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 625 // loong64: "ADDV", -"SGTU" 626 // ppc64x: "ADDC", "ADDE", -"ADDZE" 627 // s390x:"ADDE","ADDC\t[$]-1," 628 // mips64:"ADDV",-"SGTU" 629 // riscv64: "ADD",-"SLTU" 630 r, _ := bits.Add64(x, y, ci) 631 return r 632 } 633 634 func Add64M(p, q, r *[3]uint64) { 635 var c uint64 636 r[0], c = bits.Add64(p[0], q[0], c) 637 // arm64:"ADCS",-"ADD\t",-"CMP" 638 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 639 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 640 // s390x:"ADDE",-"ADDC\t[$]-1," 641 r[1], c = bits.Add64(p[1], q[1], c) 642 r[2], c = bits.Add64(p[2], q[2], c) 643 } 644 645 func Add64M0(p, q, r *[3]uint64) { 646 var c uint64 647 r[0], c = bits.Add64(p[0], q[0], 0) 648 // ppc64x: -"ADDC", -"ADDE", "ADDZE\tR[1-9]" 649 r[1], c = bits.Add64(p[1], 0, c) 650 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 651 r[2], c = bits.Add64(p[2], p[2], c) 652 } 653 654 func Add64MSaveC(p, q, r, c *[2]uint64) { 655 // ppc64x: "ADDC\tR", "ADDZE" 656 r[0], c[0] = bits.Add64(p[0], q[0], 0) 657 // ppc64x: "ADDC\t[$]-1", "ADDE", "ADDZE" 658 r[1], c[1] = bits.Add64(p[1], q[1], c[0]) 659 } 660 661 func Add64PanicOnOverflowEQ(a, b uint64) uint64 { 662 r, c := bits.Add64(a, b, 0) 663 // s390x:"BRC\t[$]3,",-"ADDE" 664 if c == 1 { 665 panic("overflow") 666 } 667 return r 668 } 669 670 func Add64PanicOnOverflowNE(a, b uint64) uint64 { 671 r, c := bits.Add64(a, b, 0) 672 // s390x:"BRC\t[$]3,",-"ADDE" 673 if c != 0 { 674 panic("overflow") 675 } 676 return r 677 } 678 679 func Add64PanicOnOverflowGT(a, b uint64) uint64 { 680 r, c := bits.Add64(a, b, 0) 681 // s390x:"BRC\t[$]3,",-"ADDE" 682 if c > 0 { 683 panic("overflow") 684 } 685 return r 686 } 687 688 func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 689 var r [2]uint64 690 var c uint64 691 r[0], c = bits.Add64(a[0], b[0], c) 692 r[1], c = bits.Add64(a[1], b[1], c) 693 // s390x:"BRC\t[$]3," 694 if c == 1 { 695 panic("overflow") 696 } 697 return r 698 } 699 700 func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 701 var r [2]uint64 702 var c uint64 703 r[0], c = bits.Add64(a[0], b[0], c) 704 r[1], c = bits.Add64(a[1], b[1], c) 705 // s390x:"BRC\t[$]3," 706 if c != 0 { 707 panic("overflow") 708 } 709 return r 710 } 711 712 func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 713 var r [2]uint64 714 var c uint64 715 r[0], c = bits.Add64(a[0], b[0], c) 716 r[1], c = bits.Add64(a[1], b[1], c) 717 // s390x:"BRC\t[$]3," 718 if c > 0 { 719 panic("overflow") 720 } 721 return r 722 } 723 724 // Verify independent carry chain operations are scheduled efficiently 725 // and do not cause unnecessary save/restore of the CA bit. 726 // 727 // This is an example of why CarryChainTail priority must be lower 728 // (earlier in the block) than Memory. f[0]=f1 could be scheduled 729 // after the first two lower 64 bit limb adds, but before either 730 // high 64 bit limbs are added. 731 // 732 // This is what happened on PPC64 when compiling 733 // crypto/internal/edwards25519/field.feMulGeneric. 734 func Add64MultipleChains(a, b, c, d [2]uint64) { 735 var cx, d1, d2 uint64 736 a1, a2 := a[0], a[1] 737 b1, b2 := b[0], b[1] 738 c1, c2 := c[0], c[1] 739 740 // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 741 d1, cx = bits.Add64(a1, b1, 0) 742 // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER" 743 d2, _ = bits.Add64(a2, b2, cx) 744 745 // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 746 d1, cx = bits.Add64(c1, d1, 0) 747 // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER" 748 d2, _ = bits.Add64(c2, d2, cx) 749 d[0] = d1 750 d[1] = d2 751 } 752 753 // --------------- // 754 // bits.Sub* // 755 // --------------- // 756 757 func Sub(x, y, ci uint) (r, co uint) { 758 // amd64:"NEGL","SBBQ","NEGQ" 759 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 760 // loong64:"SUBV","SGTU" 761 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 762 // s390x:"SUBE" 763 // mips64:"SUBV","SGTU" 764 // riscv64: "SUB","SLTU" 765 return bits.Sub(x, y, ci) 766 } 767 768 func SubC(x, ci uint) (r, co uint) { 769 // amd64:"NEGL","SBBQ","NEGQ" 770 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 771 // loong64:"SUBV","SGTU" 772 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 773 // s390x:"SUBE" 774 // mips64:"SUBV","SGTU" 775 // riscv64: "SUB","SLTU" 776 return bits.Sub(x, 7, ci) 777 } 778 779 func SubZ(x, y uint) (r, co uint) { 780 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 781 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 782 // loong64:"SUBV","SGTU" 783 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 784 // s390x:"SUBC" 785 // mips64:"SUBV","SGTU" 786 // riscv64: "SUB","SLTU" 787 return bits.Sub(x, y, 0) 788 } 789 790 func SubR(x, y, ci uint) uint { 791 // amd64:"NEGL","SBBQ",-"NEGQ" 792 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 793 // loong64:"SUBV",-"SGTU" 794 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 795 // s390x:"SUBE" 796 // riscv64: "SUB",-"SLTU" 797 r, _ := bits.Sub(x, y, ci) 798 return r 799 } 800 func SubM(p, q, r *[3]uint) { 801 var c uint 802 r[0], c = bits.Sub(p[0], q[0], c) 803 // amd64:"SBBQ",-"NEGL",-"NEGQ" 804 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 805 // ppc64x:-"SUBC", "SUBE", -"SUBZE", -"NEG" 806 // s390x:"SUBE" 807 r[1], c = bits.Sub(p[1], q[1], c) 808 r[2], c = bits.Sub(p[2], q[2], c) 809 } 810 811 func Sub64(x, y, ci uint64) (r, co uint64) { 812 // amd64:"NEGL","SBBQ","NEGQ" 813 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 814 // loong64:"SUBV","SGTU" 815 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 816 // s390x:"SUBE" 817 // mips64:"SUBV","SGTU" 818 // riscv64: "SUB","SLTU" 819 return bits.Sub64(x, y, ci) 820 } 821 822 func Sub64C(x, ci uint64) (r, co uint64) { 823 // amd64:"NEGL","SBBQ","NEGQ" 824 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 825 // loong64:"SUBV","SGTU" 826 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 827 // s390x:"SUBE" 828 // mips64:"SUBV","SGTU" 829 // riscv64: "SUB","SLTU" 830 return bits.Sub64(x, 7, ci) 831 } 832 833 func Sub64Z(x, y uint64) (r, co uint64) { 834 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 835 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 836 // loong64:"SUBV","SGTU" 837 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 838 // s390x:"SUBC" 839 // mips64:"SUBV","SGTU" 840 // riscv64: "SUB","SLTU" 841 return bits.Sub64(x, y, 0) 842 } 843 844 func Sub64R(x, y, ci uint64) uint64 { 845 // amd64:"NEGL","SBBQ",-"NEGQ" 846 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 847 // loong64:"SUBV",-"SGTU" 848 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 849 // s390x:"SUBE" 850 // riscv64: "SUB",-"SLTU" 851 r, _ := bits.Sub64(x, y, ci) 852 return r 853 } 854 func Sub64M(p, q, r *[3]uint64) { 855 var c uint64 856 r[0], c = bits.Sub64(p[0], q[0], c) 857 // amd64:"SBBQ",-"NEGL",-"NEGQ" 858 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 859 // s390x:"SUBE" 860 r[1], c = bits.Sub64(p[1], q[1], c) 861 r[2], c = bits.Sub64(p[2], q[2], c) 862 } 863 864 func Sub64MSaveC(p, q, r, c *[2]uint64) { 865 // ppc64x:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG" 866 r[0], c[0] = bits.Sub64(p[0], q[0], 0) 867 // ppc64x:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG" 868 r[1], c[1] = bits.Sub64(p[1], q[1], c[0]) 869 } 870 871 func Sub64PanicOnOverflowEQ(a, b uint64) uint64 { 872 r, b := bits.Sub64(a, b, 0) 873 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 874 if b == 1 { 875 panic("overflow") 876 } 877 return r 878 } 879 880 func Sub64PanicOnOverflowNE(a, b uint64) uint64 { 881 r, b := bits.Sub64(a, b, 0) 882 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 883 if b != 0 { 884 panic("overflow") 885 } 886 return r 887 } 888 889 func Sub64PanicOnOverflowGT(a, b uint64) uint64 { 890 r, b := bits.Sub64(a, b, 0) 891 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 892 if b > 0 { 893 panic("overflow") 894 } 895 return r 896 } 897 898 func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 899 var r [2]uint64 900 var c uint64 901 r[0], c = bits.Sub64(a[0], b[0], c) 902 r[1], c = bits.Sub64(a[1], b[1], c) 903 // s390x:"BRC\t[$]12," 904 if c == 1 { 905 panic("overflow") 906 } 907 return r 908 } 909 910 func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 911 var r [2]uint64 912 var c uint64 913 r[0], c = bits.Sub64(a[0], b[0], c) 914 r[1], c = bits.Sub64(a[1], b[1], c) 915 // s390x:"BRC\t[$]12," 916 if c != 0 { 917 panic("overflow") 918 } 919 return r 920 } 921 922 func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 923 var r [2]uint64 924 var c uint64 925 r[0], c = bits.Sub64(a[0], b[0], c) 926 r[1], c = bits.Sub64(a[1], b[1], c) 927 // s390x:"BRC\t[$]12," 928 if c > 0 { 929 panic("overflow") 930 } 931 return r 932 } 933 934 // --------------- // 935 // bits.Mul* // 936 // --------------- // 937 938 func Mul(x, y uint) (hi, lo uint) { 939 // amd64:"MULQ" 940 // arm64:"UMULH","MUL" 941 // ppc64x:"MULHDU","MULLD" 942 // s390x:"MLGR" 943 // mips64: "MULVU" 944 // riscv64:"MULHU","MUL" 945 return bits.Mul(x, y) 946 } 947 948 func Mul64(x, y uint64) (hi, lo uint64) { 949 // amd64:"MULQ" 950 // arm64:"UMULH","MUL" 951 // ppc64x:"MULHDU","MULLD" 952 // s390x:"MLGR" 953 // mips64: "MULVU" 954 // riscv64:"MULHU","MUL" 955 return bits.Mul64(x, y) 956 } 957 958 func Mul64HiOnly(x, y uint64) uint64 { 959 // arm64:"UMULH",-"MUL" 960 // riscv64:"MULHU",-"MUL\t" 961 hi, _ := bits.Mul64(x, y) 962 return hi 963 } 964 965 func Mul64LoOnly(x, y uint64) uint64 { 966 // arm64:"MUL",-"UMULH" 967 // riscv64:"MUL\t",-"MULHU" 968 _, lo := bits.Mul64(x, y) 969 return lo 970 } 971 972 func Mul64Const() (uint64, uint64) { 973 // 7133701809754865664 == 99<<56 974 // arm64:"MOVD\t[$]7133701809754865664, R1", "MOVD\t[$]88, R0" 975 return bits.Mul64(99+88<<8, 1<<56) 976 } 977 978 func MulUintOverflow(p *uint64) []uint64 { 979 // arm64:"CMP\t[$]72" 980 return unsafe.Slice(p, 9) 981 } 982 983 // --------------- // 984 // bits.Div* // 985 // --------------- // 986 987 func Div(hi, lo, x uint) (q, r uint) { 988 // amd64:"DIVQ" 989 return bits.Div(hi, lo, x) 990 } 991 992 func Div32(hi, lo, x uint32) (q, r uint32) { 993 // arm64:"ORR","UDIV","MSUB",-"UREM" 994 return bits.Div32(hi, lo, x) 995 } 996 997 func Div64(hi, lo, x uint64) (q, r uint64) { 998 // amd64:"DIVQ" 999 return bits.Div64(hi, lo, x) 1000 } 1001 1002 func Div64degenerate(x uint64) (q, r uint64) { 1003 // amd64:-"DIVQ" 1004 return bits.Div64(0, x, 5) 1005 } 1006