// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "textflag.h" #define PosInf 0x7FF0000000000000 #define NegInf 0xFFF0000000000000 #define NegZero 0x8000000000000000 #define Pi 0x400921FB54442D18 #define NegPi 0xC00921FB54442D18 #define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4 #define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4 #define PiDiv4 0x3FE921FB54442D18 // Pi/4 #define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4 // Minimax polynomial coefficients and other constants DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00 DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00 DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00 DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00 DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01 DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01 DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01 DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01 DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01 DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01 DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01 DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01 DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01 DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01 DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02 DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04 DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02 DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03 DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03 DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01 GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160 DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32 DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000 GLOBL ·atan2xpim<> + 0(SB), RODATA, $8 // Atan2 returns the arc tangent of y/x, using // the signs of the two to determine the quadrant // of the return value. // // Special cases are (in order): // Atan2(y, NaN) = NaN // Atan2(NaN, x) = NaN // Atan2(+0, x>=0) = +0 // Atan2(-0, x>=0) = -0 // Atan2(+0, x<=-0) = +Pi // Atan2(-0, x<=-0) = -Pi // Atan2(y>0, 0) = +Pi/2 // Atan2(y<0, 0) = -Pi/2 // Atan2(+Inf, +Inf) = +Pi/4 // Atan2(-Inf, +Inf) = -Pi/4 // Atan2(+Inf, -Inf) = 3Pi/4 // Atan2(-Inf, -Inf) = -3Pi/4 // Atan2(y, +Inf) = 0 // Atan2(y>0, -Inf) = +Pi // Atan2(y<0, -Inf) = -Pi // Atan2(+Inf, x) = +Pi/2 // Atan2(-Inf, x) = -Pi/2 // The algorithm used is minimax polynomial approximation // with coefficients determined with a Remez exchange algorithm. TEXT ·atan2Asm(SB), NOSPLIT, $0-24 // special case MOVD x+0(FP), R1 MOVD y+8(FP), R2 // special case Atan2(NaN, y) = NaN MOVD $~(1<<63), R5 AND R1, R5 // x = |x| MOVD $PosInf, R3 CMPUBLT R3, R5, returnX // special case Atan2(x, NaN) = NaN MOVD $~(1<<63), R5 AND R2, R5 CMPUBLT R3, R5, returnY MOVD $NegZero, R3 CMPUBEQ R3, R1, xIsNegZero MOVD $0, R3 CMPUBEQ R3, R1, xIsPosZero MOVD $PosInf, R4 CMPUBEQ R4, R2, yIsPosInf MOVD $NegInf, R4 CMPUBEQ R4, R2, yIsNegInf BR Normal xIsNegZero: // special case Atan(-0, y>=0) = -0 MOVD $0, R4 CMPBLE R4, R2, returnX //special case Atan2(-0, y<=-0) = -Pi MOVD $NegZero, R4 CMPBGE R4, R2, returnNegPi BR Normal xIsPosZero: //special case Atan2(0, 0) = 0 MOVD $0, R4 CMPUBEQ R4, R2, returnX //special case Atan2(0, y<=-0) = Pi MOVD $NegZero, R4 CMPBGE R4, R2, returnPi BR Normal yIsNegInf: //special case Atan2(+Inf, -Inf) = 3Pi/4 MOVD $PosInf, R3 CMPUBEQ R3, R1, posInfNegInf //special case Atan2(-Inf, -Inf) = -3Pi/4 MOVD $NegInf, R3 CMPUBEQ R3, R1, negInfNegInf BR Normal yIsPosInf: //special case Atan2(+Inf, +Inf) = Pi/4 MOVD $PosInf, R3 CMPUBEQ R3, R1, posInfPosInf //special case Atan2(-Inf, +Inf) = -Pi/4 MOVD $NegInf, R3 CMPUBEQ R3, R1, negInfPosInf //special case Atan2(x, +Inf) = Copysign(0, x) CMPBLT R1, $0, returnNegZero BR returnPosZero Normal: FMOVD x+0(FP), F0 FMOVD y+8(FP), F2 MOVD $·atan2rodataL25<>+0(SB), R9 LGDR F0, R2 LGDR F2, R1 RISBGNZ $32, $63, $32, R2, R2 RISBGNZ $32, $63, $32, R1, R1 WORD $0xB9170032 //llgtr %r3,%r2 RISBGZ $63, $63, $33, R2, R5 WORD $0xB9170041 //llgtr %r4,%r1 WFLCDB V0, V20 MOVW R4, R6 MOVW R3, R7 CMPUBLT R6, R7, L17 WFDDB V2, V0, V3 ADDW $2, R5, R2 MOVW R4, R6 MOVW R3, R7 CMPUBLE R6, R7, L20 L3: WFMDB V3, V3, V4 VLEG $0, 152(R9), V18 VLEG $0, 144(R9), V16 FMOVD 136(R9), F1 FMOVD 128(R9), F5 FMOVD 120(R9), F6 WFMADB V4, V16, V5, V16 WFMADB V4, V6, V1, V6 FMOVD 112(R9), F7 WFMDB V4, V4, V1 WFMADB V4, V7, V18, V7 VLEG $0, 104(R9), V18 WFMADB V1, V6, V16, V6 CMPWU R4, R3 FMOVD 96(R9), F5 VLEG $0, 88(R9), V16 WFMADB V4, V5, V18, V5 VLEG $0, 80(R9), V18 VLEG $0, 72(R9), V22 WFMADB V4, V16, V18, V16 VLEG $0, 64(R9), V18 WFMADB V1, V7, V5, V7 WFMADB V4, V18, V22, V18 WFMDB V1, V1, V5 WFMADB V1, V16, V18, V16 VLEG $0, 56(R9), V18 WFMADB V5, V6, V7, V6 VLEG $0, 48(R9), V22 FMOVD 40(R9), F7 WFMADB V4, V7, V18, V7 VLEG $0, 32(R9), V18 WFMADB V5, V6, V16, V6 WFMADB V4, V18, V22, V18 VLEG $0, 24(R9), V16 WFMADB V1, V7, V18, V7 VLEG $0, 16(R9), V18 VLEG $0, 8(R9), V22 WFMADB V4, V18, V16, V18 VLEG $0, 0(R9), V16 WFMADB V5, V6, V7, V6 WFMADB V4, V16, V22, V16 FMUL F3, F4 WFMADB V1, V18, V16, V1 FMADD F6, F5, F1 WFMADB V4, V1, V3, V4 BLT L18 BGT L7 LTDBR F2, F2 BLTU L21 L8: LTDBR F0, F0 BLTU L22 L9: WFCHDBS V2, V0, V0 BNE L18 L7: MOVW R1, R6 CMPBGE R6, $0, L1 L18: RISBGZ $58, $60, $3, R2, R2 MOVD $·atan2xpi2h<>+0(SB), R1 MOVD ·atan2xpim<>+0(SB), R3 LDGR R3, F0 WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1) BYTE $0x40 BYTE $0x1E L1: FMOVD F4, ret+16(FP) RET L20: LTDBR F2, F2 BLTU L23 FMOVD F2, F6 L4: LTDBR F0, F0 BLTU L24 FMOVD F0, F4 L5: WFCHDBS V6, V4, V4 BEQ L3 L17: WFDDB V0, V2, V4 BYTE $0x18 //lr %r2,%r5 BYTE $0x25 WORD $0xB3130034 //lcdbr %f3,%f4 BR L3 L23: WORD $0xB3130062 //lcdbr %f6,%f2 BR L4 L22: VLR V20, V0 BR L9 L21: WORD $0xB3130022 //lcdbr %f2,%f2 BR L8 L24: VLR V20, V4 BR L5 returnX: //the result is same as the first argument MOVD R1, ret+16(FP) RET returnY: //the result is same as the second argument MOVD R2, ret+16(FP) RET returnPi: MOVD $Pi, R1 MOVD R1, ret+16(FP) RET returnNegPi: MOVD $NegPi, R1 MOVD R1, ret+16(FP) RET posInfNegInf: MOVD $Pi3Div4, R1 MOVD R1, ret+16(FP) RET negInfNegInf: MOVD $NegPi3Div4, R1 MOVD R1, ret+16(FP) RET posInfPosInf: MOVD $PiDiv4, R1 MOVD R1, ret+16(FP) RET negInfPosInf: MOVD $NegPiDiv4, R1 MOVD R1, ret+16(FP) RET returnNegZero: MOVD $NegZero, R1 MOVD R1, ret+16(FP) RET returnPosZero: MOVD $0, ret+16(FP) RET