代码拉取完成,页面将自动刷新
package main
import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
func genLt_F64() {
data := GLOBL("dataLtF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Lt_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB0_7"))
CMPQ(RCX, Imm(16))
JAE(LabelRef("LBB0_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB0_6"))
Label("LBB0_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-16), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB0_4")
{
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8), Y1)
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2)
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3)
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4)
VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSDW(X1, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3, Y3)
VPUNPCKLDQ(X2, X1, X1)
VEXTRACTF128(Imm(1), Y3, X2)
VPACKSSDW(X2, X3, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X0, X3, X3)
VPBROADCASTD(X3, X3)
VPBROADCASTD(X2, X2)
VPUNPCKLDQ(X3, X2, X2)
VPBLENDD(Imm(12), X2, X1, X1)
VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(16), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB0_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB0_7"))
}
Label("LBB0_6")
{
VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
SETCS(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB0_6"))
}
Label("LBB0_7")
{
VZEROUPPER()
RET()
}
}
func genLt_F32() {
data := GLOBL("dataLtF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Lt_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB1_7"))
CMPQ(RCX, Imm(32))
JAE(LabelRef("LBB1_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB1_6"))
Label("LBB1_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-32), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB1_4")
{
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4), Y1)
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2)
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3)
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4)
VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X5)
VPACKSSDW(X5, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4, Y4)
VPAND(X0, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X0, X4, X4)
VINSERTI128(Imm(1), X4, Y3, Y3)
VINSERTI128(Imm(1), X2, Y1, Y1)
VPUNPCKLQDQ(Y3, Y1, Y1)
VPERMQ(Imm(216), Y1, Y1)
VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(32), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB1_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB1_7"))
}
Label("LBB1_6")
{
VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
SETCS(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB1_6"))
}
Label("LBB1_7")
{
VZEROUPPER()
RET()
}
}
func genLte_F64() {
data := GLOBL("dataLteF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Lte_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB2_7"))
CMPQ(RCX, Imm(16))
JAE(LabelRef("LBB2_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB2_6"))
Label("LBB2_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-16), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB2_4")
{
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8), Y1)
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2)
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3)
VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4)
VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSDW(X1, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3, Y3)
VPUNPCKLDQ(X2, X1, X1)
VEXTRACTF128(Imm(1), Y3, X2)
VPACKSSDW(X2, X3, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X0, X3, X3)
VPBROADCASTD(X3, X3)
VPBROADCASTD(X2, X2)
VPUNPCKLDQ(X3, X2, X2)
VPBLENDD(Imm(12), X2, X1, X1)
VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(16), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB2_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB2_7"))
}
Label("LBB2_6")
{
VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
SETLS(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB2_6"))
}
Label("LBB2_7")
{
VZEROUPPER()
RET()
}
}
func genLte_F32() {
data := GLOBL("dataLteF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Lte_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB3_7"))
CMPQ(RCX, Imm(32))
JAE(LabelRef("LBB3_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB3_6"))
Label("LBB3_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-32), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB3_4")
{
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4), Y1)
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2)
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3)
VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4)
VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X5)
VPACKSSDW(X5, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4, Y4)
VPAND(X0, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X0, X4, X4)
VINSERTI128(Imm(1), X4, Y3, Y3)
VINSERTI128(Imm(1), X2, Y1, Y1)
VPUNPCKLQDQ(Y3, Y1, Y1)
VPERMQ(Imm(216), Y1, Y1)
VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(32), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB3_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB3_7"))
}
Label("LBB3_6")
{
VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
SETLS(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB3_6"))
}
Label("LBB3_7")
{
VZEROUPPER()
RET()
}
}
func genGt_F64() {
data := GLOBL("dataGtF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Gt_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB4_7"))
CMPQ(RCX, Imm(16))
JAE(LabelRef("LBB4_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB4_6"))
Label("LBB4_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-16), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB4_4")
{
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSDW(X1, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
VPUNPCKLDQ(X2, X1, X1)
VEXTRACTF128(Imm(1), Y3, X2)
VPACKSSDW(X2, X3, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X0, X3, X3)
VPBROADCASTD(X3, X3)
VPBROADCASTD(X2, X2)
VPUNPCKLDQ(X3, X2, X2)
VPBLENDD(Imm(12), X2, X1, X1)
VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(16), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB4_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB4_7"))
}
Label("LBB4_6")
{
VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
SETHI(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB4_6"))
}
Label("LBB4_7")
{
VZEROUPPER()
RET()
}
}
func genGt_F32() {
data := GLOBL("dataGtF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Gt_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB5_7"))
CMPQ(RCX, Imm(32))
JAE(LabelRef("LBB5_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB5_6"))
Label("LBB5_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-32), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB5_4")
{
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X5)
VPACKSSDW(X5, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
VPAND(X0, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X0, X4, X4)
VINSERTI128(Imm(1), X4, Y3, Y3)
VINSERTI128(Imm(1), X2, Y1, Y1)
VPUNPCKLQDQ(Y3, Y1, Y1)
VPERMQ(Imm(216), Y1, Y1)
VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(32), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB5_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB5_7"))
}
Label("LBB5_6")
{
VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
SETHI(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB5_6"))
}
Label("LBB5_7")
{
VZEROUPPER()
RET()
}
}
func genGte_F64() {
data := GLOBL("dataGteF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Gte_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB6_7"))
CMPQ(RCX, Imm(16))
JAE(LabelRef("LBB6_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB6_6"))
Label("LBB6_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-16), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB6_4")
{
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSDW(X1, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
VPUNPCKLDQ(X2, X1, X1)
VEXTRACTF128(Imm(1), Y3, X2)
VPACKSSDW(X2, X3, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X0, X3, X3)
VPBROADCASTD(X3, X3)
VPBROADCASTD(X2, X2)
VPUNPCKLDQ(X3, X2, X2)
VPBLENDD(Imm(12), X2, X1, X1)
VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(16), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB6_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB6_7"))
}
Label("LBB6_6")
{
VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
SETCC(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB6_6"))
}
Label("LBB6_7")
{
VZEROUPPER()
RET()
}
}
func genGte_F32() {
data := GLOBL("dataGteF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Gte_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB7_7"))
CMPQ(RCX, Imm(32))
JAE(LabelRef("LBB7_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB7_6"))
Label("LBB7_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-32), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB7_4")
{
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X5)
VPACKSSDW(X5, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
VPAND(X0, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X0, X4, X4)
VINSERTI128(Imm(1), X4, Y3, Y3)
VINSERTI128(Imm(1), X2, Y1, Y1)
VPUNPCKLQDQ(Y3, Y1, Y1)
VPERMQ(Imm(216), Y1, Y1)
VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(32), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB7_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB7_7"))
}
Label("LBB7_6")
{
VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
SETCC(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB7_6"))
}
Label("LBB7_7")
{
VZEROUPPER()
RET()
}
}
func genEq_F64() {
data := GLOBL("dataEqF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Eq_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB8_7"))
CMPQ(RCX, Imm(16))
JAE(LabelRef("LBB8_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB8_6"))
Label("LBB8_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-16), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB8_4")
{
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSDW(X1, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
VPUNPCKLDQ(X2, X1, X1)
VEXTRACTF128(Imm(1), Y3, X2)
VPACKSSDW(X2, X3, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X0, X3, X3)
VPBROADCASTD(X3, X3)
VPBROADCASTD(X2, X2)
VPUNPCKLDQ(X3, X2, X2)
VPBLENDD(Imm(12), X2, X1, X1)
VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(16), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB8_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB8_7"))
}
Label("LBB8_6")
{
VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
SETEQ(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB8_6"))
}
Label("LBB8_7")
{
VZEROUPPER()
RET()
}
}
func genEq_F32() {
data := GLOBL("dataEqF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Eq_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB9_7"))
CMPQ(RCX, Imm(32))
JAE(LabelRef("LBB9_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB9_6"))
Label("LBB9_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-32), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB9_4")
{
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X5)
VPACKSSDW(X5, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
VPAND(X0, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X0, X4, X4)
VINSERTI128(Imm(1), X4, Y3, Y3)
VINSERTI128(Imm(1), X2, Y1, Y1)
VPUNPCKLQDQ(Y3, Y1, Y1)
VPERMQ(Imm(216), Y1, Y1)
VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(32), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB9_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB9_7"))
}
Label("LBB9_6")
{
VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
SETEQ(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB9_6"))
}
Label("LBB9_7")
{
VZEROUPPER()
RET()
}
}
func genNeq_F64() {
data := GLOBL("dataNeqF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Neq_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB10_7"))
CMPQ(RCX, Imm(16))
JAE(LabelRef("LBB10_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB10_6"))
Label("LBB10_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-16), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB10_4")
{
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSDW(X1, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
VPUNPCKLDQ(X2, X1, X1)
VEXTRACTF128(Imm(1), Y3, X2)
VPACKSSDW(X2, X3, X2)
VPACKSSDW(X2, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X0, X3, X3)
VPBROADCASTD(X3, X3)
VPBROADCASTD(X2, X2)
VPUNPCKLDQ(X3, X2, X2)
VPBLENDD(Imm(12), X2, X1, X1)
VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(16), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB10_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB10_7"))
}
Label("LBB10_6")
{
VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
SETNE(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB10_6"))
}
Label("LBB10_7")
{
VZEROUPPER()
RET()
}
}
func genNeq_F32() {
data := GLOBL("dataNeqF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("Neq_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("z").Base(), RDX)
Load(Param("x").Len(), RCX)
TESTQ(RCX, RCX)
JE(LabelRef("LBB11_7"))
CMPQ(RCX, Imm(32))
JAE(LabelRef("LBB11_3"))
XORL(R8L, R8L)
JMP(LabelRef("LBB11_6"))
Label("LBB11_3")
{
MOVQ(RCX, R8)
ANDQ(I32(-32), R8)
XORL(EAX, EAX)
VMOVDQU(data.Offset(0), X0)
}
Label("LBB11_4")
{
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
VEXTRACTF128(Imm(1), Y1, X5)
VPACKSSDW(X5, X1, X1)
VPACKSSWB(X1, X1, X1)
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
VPAND(X0, X1, X1)
VEXTRACTF128(Imm(1), Y2, X5)
VPACKSSDW(X5, X2, X2)
VPACKSSWB(X2, X2, X2)
VPAND(X0, X2, X2)
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X5)
VPACKSSDW(X5, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
VPAND(X0, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X0, X4, X4)
VINSERTI128(Imm(1), X4, Y3, Y3)
VINSERTI128(Imm(1), X2, Y1, Y1)
VPUNPCKLQDQ(Y3, Y1, Y1)
VPERMQ(Imm(216), Y1, Y1)
VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(32), RAX)
CMPQ(R8, RAX)
JNE(LabelRef("LBB11_4"))
CMPQ(R8, RCX)
JE(LabelRef("LBB11_7"))
}
Label("LBB11_6")
{
VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
SETNE(Mem{Base: RDI}.Idx(R8, 1))
ADDQ(Imm(1), R8)
CMPQ(RCX, R8)
JNE(LabelRef("LBB11_6"))
}
Label("LBB11_7")
{
VZEROUPPER()
RET()
}
}
func genLtNumber_F64() {
data := GLOBL("dataLtNumberF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("LtNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB12_7"))
CMPQ(RDX, Imm(16))
JAE(LabelRef("LBB12_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB12_6"))
Label("LBB12_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-16), RAX)
VBROADCASTSD(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB12_4")
{
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8), Y3)
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y4)
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y5)
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y6)
VCMPPD(Imm(1), Y1, Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X7)
VPACKSSDW(X7, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X2, X3, X3)
VCMPPD(Imm(1), Y1, Y4, Y4)
VEXTRACTF128(Imm(1), Y4, X7)
VPACKSSDW(X7, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VPUNPCKLDQ(X4, X3, X3)
VCMPPD(Imm(1), Y1, Y5, Y4)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(1), Y1, Y6, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSDW(X5, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VPBROADCASTD(X5, X5)
VPBROADCASTD(X4, X4)
VPUNPCKLDQ(X5, X4, X4)
VPBLENDD(Imm(12), X4, X3, X3)
VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(16), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB12_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB12_7"))
}
Label("LBB12_6")
{
VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
SETHI(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB12_6"))
}
Label("LBB12_7")
{
VZEROUPPER()
RET()
}
}
func genLtNumber_F32() {
data := GLOBL("dataLtNumberF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("LtNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB13_7"))
CMPQ(RDX, Imm(32))
JAE(LabelRef("LBB13_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB13_6"))
Label("LBB13_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-32), RAX)
VBROADCASTSS(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB13_4")
{
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4), Y3)
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y4)
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y5)
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y6)
VCMPPS(Imm(1), Y1, Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X7)
VPACKSSDW(X7, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X2, X3, X3)
VCMPPS(Imm(1), Y1, Y4, Y4)
VEXTRACTF128(Imm(1), Y4, X7)
VPACKSSDW(X7, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPS(Imm(1), Y1, Y5, Y5)
VEXTRACTF128(Imm(1), Y5, X7)
VPACKSSDW(X7, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VCMPPS(Imm(1), Y1, Y6, Y6)
VEXTRACTF128(Imm(1), Y6, X7)
VPACKSSDW(X7, X6, X6)
VPACKSSWB(X6, X6, X6)
VPAND(X2, X6, X6)
VINSERTI128(Imm(1), X6, Y5, Y5)
VINSERTI128(Imm(1), X4, Y3, Y3)
VPUNPCKLQDQ(Y5, Y3, Y3)
VPERMQ(Imm(216), Y3, Y3)
VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(32), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB13_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB13_7"))
}
Label("LBB13_6")
{
VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
SETHI(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB13_6"))
}
Label("LBB13_7")
{
VZEROUPPER()
RET()
}
}
func genLteNumber_F64() {
data := GLOBL("dataLteNumberF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("LteNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB14_7"))
CMPQ(RDX, Imm(16))
JAE(LabelRef("LBB14_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB14_6"))
Label("LBB14_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-16), RAX)
VBROADCASTSD(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB14_4")
{
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8), Y3)
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y4)
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y5)
VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y6)
VCMPPD(Imm(2), Y1, Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X7)
VPACKSSDW(X7, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X2, X3, X3)
VCMPPD(Imm(2), Y1, Y4, Y4)
VEXTRACTF128(Imm(1), Y4, X7)
VPACKSSDW(X7, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VPUNPCKLDQ(X4, X3, X3)
VCMPPD(Imm(2), Y1, Y5, Y4)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(2), Y1, Y6, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSDW(X5, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VPBROADCASTD(X5, X5)
VPBROADCASTD(X4, X4)
VPUNPCKLDQ(X5, X4, X4)
VPBLENDD(Imm(12), X4, X3, X3)
VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(16), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB14_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB14_7"))
}
Label("LBB14_6")
{
VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
SETCC(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB14_6"))
}
Label("LBB14_7")
{
VZEROUPPER()
RET()
}
}
func genLteNumber_F32() {
data := GLOBL("dataLteNumberF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("LteNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB15_7"))
CMPQ(RDX, Imm(32))
JAE(LabelRef("LBB15_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB15_6"))
Label("LBB15_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-32), RAX)
VBROADCASTSS(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB15_4")
{
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4), Y3)
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y4)
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y5)
VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y6)
VCMPPS(Imm(2), Y1, Y3, Y3)
VEXTRACTF128(Imm(1), Y3, X7)
VPACKSSDW(X7, X3, X3)
VPACKSSWB(X3, X3, X3)
VPAND(X2, X3, X3)
VCMPPS(Imm(2), Y1, Y4, Y4)
VEXTRACTF128(Imm(1), Y4, X7)
VPACKSSDW(X7, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPS(Imm(2), Y1, Y5, Y5)
VEXTRACTF128(Imm(1), Y5, X7)
VPACKSSDW(X7, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VCMPPS(Imm(2), Y1, Y6, Y6)
VEXTRACTF128(Imm(1), Y6, X7)
VPACKSSDW(X7, X6, X6)
VPACKSSWB(X6, X6, X6)
VPAND(X2, X6, X6)
VINSERTI128(Imm(1), X6, Y5, Y5)
VINSERTI128(Imm(1), X4, Y3, Y3)
VPUNPCKLQDQ(Y5, Y3, Y3)
VPERMQ(Imm(216), Y3, Y3)
VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(32), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB15_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB15_7"))
}
Label("LBB15_6")
{
VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
SETCC(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB15_6"))
}
Label("LBB15_7")
{
VZEROUPPER()
RET()
}
}
func genGtNumber_F64() {
data := GLOBL("dataGtNumberF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("GtNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB16_7"))
CMPQ(RDX, Imm(16))
JAE(LabelRef("LBB16_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB16_6"))
Label("LBB16_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-16), RAX)
VBROADCASTSD(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB16_4")
{
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
VPUNPCKLDQ(X4, X3, X3)
VEXTRACTF128(Imm(1), Y5, X4)
VPACKSSDW(X4, X5, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSDW(X5, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VPBROADCASTD(X5, X5)
VPBROADCASTD(X4, X4)
VPUNPCKLDQ(X5, X4, X4)
VPBLENDD(Imm(12), X4, X3, X3)
VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(16), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB16_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB16_7"))
}
Label("LBB16_6")
{
VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
SETCS(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB16_6"))
}
Label("LBB16_7")
{
VZEROUPPER()
RET()
}
}
func genGtNumber_F32() {
data := GLOBL("dataGtNumberF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("GtNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB17_7"))
CMPQ(RDX, Imm(32))
JAE(LabelRef("LBB17_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB17_6"))
Label("LBB17_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-32), RAX)
VBROADCASTSS(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB17_4")
{
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSWB(X5, X5, X5)
VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
VPAND(X2, X5, X5)
VEXTRACTF128(Imm(1), Y6, X7)
VPACKSSDW(X7, X6, X6)
VPACKSSWB(X6, X6, X6)
VPAND(X2, X6, X6)
VINSERTI128(Imm(1), X6, Y5, Y5)
VINSERTI128(Imm(1), X4, Y3, Y3)
VPUNPCKLQDQ(Y5, Y3, Y3)
VPERMQ(Imm(216), Y3, Y3)
VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(32), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB17_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB17_7"))
}
Label("LBB17_6")
{
VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
SETCS(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB17_6"))
}
Label("LBB17_7")
{
VZEROUPPER()
RET()
}
}
func genGteNumber_F64() {
data := GLOBL("dataGteNumberF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("GteNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB18_7"))
CMPQ(RDX, Imm(16))
JAE(LabelRef("LBB18_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB18_6"))
Label("LBB18_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-16), RAX)
VBROADCASTSD(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB18_4")
{
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
VPUNPCKLDQ(X4, X3, X3)
VEXTRACTF128(Imm(1), Y5, X4)
VPACKSSDW(X4, X5, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSDW(X5, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VPBROADCASTD(X5, X5)
VPBROADCASTD(X4, X4)
VPUNPCKLDQ(X5, X4, X4)
VPBLENDD(Imm(12), X4, X3, X3)
VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(16), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB18_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB18_7"))
}
Label("LBB18_6")
{
VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
SETLS(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB18_6"))
}
Label("LBB18_7")
{
VZEROUPPER()
RET()
}
}
func genGteNumber_F32() {
data := GLOBL("dataGteNumberF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("GteNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB19_7"))
CMPQ(RDX, Imm(32))
JAE(LabelRef("LBB19_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB19_6"))
Label("LBB19_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-32), RAX)
VBROADCASTSS(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB19_4")
{
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSWB(X5, X5, X5)
VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
VPAND(X2, X5, X5)
VEXTRACTF128(Imm(1), Y6, X7)
VPACKSSDW(X7, X6, X6)
VPACKSSWB(X6, X6, X6)
VPAND(X2, X6, X6)
VINSERTI128(Imm(1), X6, Y5, Y5)
VINSERTI128(Imm(1), X4, Y3, Y3)
VPUNPCKLQDQ(Y5, Y3, Y3)
VPERMQ(Imm(216), Y3, Y3)
VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(32), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB19_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB19_7"))
}
Label("LBB19_6")
{
VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
SETLS(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB19_6"))
}
Label("LBB19_7")
{
VZEROUPPER()
RET()
}
}
func genEqNumber_F64() {
data := GLOBL("dataEqNumberF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("EqNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB20_7"))
CMPQ(RDX, Imm(16))
JAE(LabelRef("LBB20_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB20_6"))
Label("LBB20_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-16), RAX)
VBROADCASTSD(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB20_4")
{
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
VPUNPCKLDQ(X4, X3, X3)
VEXTRACTF128(Imm(1), Y5, X4)
VPACKSSDW(X4, X5, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSDW(X5, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VPBROADCASTD(X5, X5)
VPBROADCASTD(X4, X4)
VPUNPCKLDQ(X5, X4, X4)
VPBLENDD(Imm(12), X4, X3, X3)
VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(16), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB20_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB20_7"))
}
Label("LBB20_6")
{
VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
SETEQ(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB20_6"))
}
Label("LBB20_7")
{
VZEROUPPER()
RET()
}
}
func genEqNumber_F32() {
data := GLOBL("dataEqNumberF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("EqNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB21_7"))
CMPQ(RDX, Imm(32))
JAE(LabelRef("LBB21_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB21_6"))
Label("LBB21_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-32), RAX)
VBROADCASTSS(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB21_4")
{
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSWB(X5, X5, X5)
VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
VPAND(X2, X5, X5)
VEXTRACTF128(Imm(1), Y6, X7)
VPACKSSDW(X7, X6, X6)
VPACKSSWB(X6, X6, X6)
VPAND(X2, X6, X6)
VINSERTI128(Imm(1), X6, Y5, Y5)
VINSERTI128(Imm(1), X4, Y3, Y3)
VPUNPCKLQDQ(Y5, Y3, Y3)
VPERMQ(Imm(216), Y3, Y3)
VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(32), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB21_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB21_7"))
}
Label("LBB21_6")
{
VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
SETEQ(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB21_6"))
}
Label("LBB21_7")
{
VZEROUPPER()
RET()
}
}
func genNeqNumber_F64() {
data := GLOBL("dataNeqNumberF64", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(0))
DATA(5, U8(0))
DATA(6, U8(0))
DATA(7, U8(0))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("NeqNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB22_7"))
CMPQ(RDX, Imm(16))
JAE(LabelRef("LBB22_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB22_6"))
Label("LBB22_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-16), RAX)
VBROADCASTSD(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB22_4")
{
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSDW(X3, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
VPUNPCKLDQ(X4, X3, X3)
VEXTRACTF128(Imm(1), Y5, X4)
VPACKSSDW(X4, X5, X4)
VPACKSSDW(X4, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSDW(X5, X5, X5)
VPACKSSWB(X5, X5, X5)
VPAND(X2, X5, X5)
VPBROADCASTD(X5, X5)
VPBROADCASTD(X4, X4)
VPUNPCKLDQ(X5, X4, X4)
VPBLENDD(Imm(12), X4, X3, X3)
VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(16), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB22_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB22_7"))
}
Label("LBB22_6")
{
VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
SETNE(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB22_6"))
}
Label("LBB22_7")
{
VZEROUPPER()
RET()
}
}
func genNeqNumber_F32() {
data := GLOBL("dataNeqNumberF32", RODATA|NOPTR)
DATA(0, U8(1))
DATA(1, U8(1))
DATA(2, U8(1))
DATA(3, U8(1))
DATA(4, U8(1))
DATA(5, U8(1))
DATA(6, U8(1))
DATA(7, U8(1))
DATA(8, U8(0))
DATA(9, U8(0))
DATA(10, U8(0))
DATA(11, U8(0))
DATA(12, U8(0))
DATA(13, U8(0))
DATA(14, U8(0))
DATA(15, U8(0))
TEXT("NeqNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
Pragma("noescape")
Load(Param("x").Base(), RDI)
Load(Param("y").Base(), RSI)
Load(Param("a"), X0)
Load(Param("x").Len(), RDX)
TESTQ(RDX, RDX)
JE(LabelRef("LBB23_7"))
CMPQ(RDX, Imm(32))
JAE(LabelRef("LBB23_3"))
XORL(EAX, EAX)
JMP(LabelRef("LBB23_6"))
Label("LBB23_3")
{
MOVQ(RDX, RAX)
ANDQ(I32(-32), RAX)
VBROADCASTSS(X0, Y1)
XORL(ECX, ECX)
VMOVDQU(data.Offset(0), X2)
}
Label("LBB23_4")
{
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
VEXTRACTF128(Imm(1), Y3, X4)
VPACKSSDW(X4, X3, X3)
VPACKSSWB(X3, X3, X3)
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
VPAND(X2, X3, X3)
VEXTRACTF128(Imm(1), Y4, X5)
VPACKSSDW(X5, X4, X4)
VPACKSSWB(X4, X4, X4)
VPAND(X2, X4, X4)
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
VEXTRACTF128(Imm(1), Y5, X6)
VPACKSSDW(X6, X5, X5)
VPACKSSWB(X5, X5, X5)
VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
VPAND(X2, X5, X5)
VEXTRACTF128(Imm(1), Y6, X7)
VPACKSSDW(X7, X6, X6)
VPACKSSWB(X6, X6, X6)
VPAND(X2, X6, X6)
VINSERTI128(Imm(1), X6, Y5, Y5)
VINSERTI128(Imm(1), X4, Y3, Y3)
VPUNPCKLQDQ(Y5, Y3, Y3)
VPERMQ(Imm(216), Y3, Y3)
VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
ADDQ(Imm(32), RCX)
CMPQ(RAX, RCX)
JNE(LabelRef("LBB23_4"))
CMPQ(RAX, RDX)
JE(LabelRef("LBB23_7"))
}
Label("LBB23_6")
{
VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
SETNE(Mem{Base: RDI}.Idx(RAX, 1))
ADDQ(Imm(1), RAX)
CMPQ(RDX, RAX)
JNE(LabelRef("LBB23_6"))
}
Label("LBB23_7")
{
VZEROUPPER()
RET()
}
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。