// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "textflag.h" // Minimax polynomial coefficients and other constants DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00 DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00 DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00 DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00 DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00 DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00 DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625 DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00 DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336. GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72 // Index tables DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202 DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000 DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605 DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303 DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808 DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312 DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010 GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80 DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141 DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130 DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112 DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101 DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0 DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2 DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1 DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0 DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092 DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081 DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070 DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052 DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041 DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030 DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012 DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001 GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128 DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1 DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90 DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532 DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1 DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90 DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532 DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1 DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90 DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1 DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90 DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532 DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1 DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90 DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532 DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1 DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90 GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128 // Cbrt returns the cube root of the argument. // // Special cases are: // Cbrt(±0) = ±0 // Cbrt(±Inf) = ±Inf // Cbrt(NaN) = NaN // The algorithm used is minimax polynomial approximation // with coefficients determined with a Remez exchange algorithm. TEXT ·cbrtAsm(SB), NOSPLIT, $0-16 FMOVD x+0(FP), F0 MOVD $·cbrtrodataL9<>+0(SB), R9 LGDR F0, R2 WORD $0xC039000F //iilf %r3,1048575 BYTE $0xFF BYTE $0xFF SRAD $32, R2 WORD $0xB9170012 //llgtr %r1,%r2 MOVW R1, R6 MOVW R3, R7 CMPBLE R6, R7, L2 WORD $0xC0397FEF //iilf %r3,2146435071 BYTE $0xFF BYTE $0xFF MOVW R3, R7 CMPBLE R6, R7, L8 L1: FMOVD F0, ret+8(FP) RET L3: L2: LTDBR F0, F0 BEQ L1 FMOVD F0, F2 WORD $0xED209040 //mdb %f2,.L10-.L9(%r9) BYTE $0x00 BYTE $0x1C MOVH $0x200, R4 LGDR F2, R2 SRAD $32, R2 L4: RISBGZ $57, $62, $39, R2, R3 MOVD $·cbrttab12067<>+0(SB), R1 WORD $0x48131000 //lh %r1,0(%r3,%r1) RISBGZ $57, $62, $45, R2, R3 MOVD $·cbrttab22068<>+0(SB), R5 RISBGNZ $60, $63, $48, R2, R2 WORD $0x4A135000 //ah %r1,0(%r3,%r5) BYTE $0x18 //lr %r3,%r1 BYTE $0x31 MOVD $·cbrttab32069<>+0(SB), R1 FMOVD 56(R9), F1 FMOVD 48(R9), F5 WORD $0xEC23393B //rosbg %r2,%r3,57,59,4 BYTE $0x04 BYTE $0x56 WORD $0xE3121000 //llc %r1,0(%r2,%r1) BYTE $0x00 BYTE $0x94 ADDW R3, R1 ADDW R4, R1 SLW $16, R1, R1 SLD $32, R1, R1 LDGR R1, F2 WFMDB V2, V2, V4 WFMDB V4, V0, V6 WFMSDB V4, V6, V2, V4 FMOVD 40(R9), F6 FMSUB F1, F4, F2 FMOVD 32(R9), F4 WFMDB V2, V2, V3 FMOVD 24(R9), F1 FMUL F3, F0 FMOVD 16(R9), F3 WFMADB V2, V0, V5, V2 FMOVD 8(R9), F5 FMADD F6, F2, F4 WFMADB V2, V1, V3, V1 WFMDB V2, V2, V6 FMOVD 0(R9), F3 WFMADB V4, V6, V1, V4 WFMADB V2, V5, V3, V2 FMADD F4, F6, F2 FMADD F2, F0, F0 FMOVD F0, ret+8(FP) RET L8: MOVH $0x0, R4 BR L4