
|
#include "go_asm.h" #include "textflag.h"
TEXT ·Index(SB),NOSPLIT,$0-56 MOVQ a_base+0(FP), DI MOVQ a_len+8(FP), DX MOVQ b_base+24(FP), R8 MOVQ b_len+32(FP), AX MOVQ DI, R10 LEAQ ret+48(FP), R11 JMP indexbody<>(SB)
TEXT ·IndexString(SB),NOSPLIT,$0-40 MOVQ a_base+0(FP), DI MOVQ a_len+8(FP), DX MOVQ b_base+16(FP), R8 MOVQ b_len+24(FP), AX MOVQ DI, R10 LEAQ ret+32(FP), R11 JMP indexbody<>(SB)
TEXT indexbody<>(SB),NOSPLIT,$0 CMPQ AX, DX JA fail CMPQ DX, $16 JAE sse42 no_sse42: CMPQ AX, $2 JA _3_or_more MOVW (R8), R8 LEAQ -1(DI)(DX*1), DX loop2: MOVW (DI), SI CMPW SI,R8 JZ success ADDQ $1,DI CMPQ DI,DX JB loop2 JMP fail _3_or_more: CMPQ AX, $3 JA _4_or_more MOVW 1(R8), BX MOVW (R8), R8 LEAQ -2(DI)(DX*1), DX loop3: MOVW (DI), SI CMPW SI,R8 JZ partial_success3 ADDQ $1,DI CMPQ DI,DX JB loop3 JMP fail partial_success3: MOVW 1(DI), SI CMPW SI,BX JZ success ADDQ $1,DI CMPQ DI,DX JB loop3 JMP fail _4_or_more: CMPQ AX, $4 JA _5_or_more MOVL (R8), R8 LEAQ -3(DI)(DX*1), DX loop4: MOVL (DI), SI CMPL SI,R8 JZ success ADDQ $1,DI CMPQ DI,DX JB loop4 JMP fail _5_or_more: CMPQ AX, $7 JA _8_or_more LEAQ 1(DI)(DX*1), DX SUBQ AX, DX MOVL -4(R8)(AX*1), BX MOVL (R8), R8 loop5to7: MOVL (DI), SI CMPL SI,R8 JZ partial_success5to7 ADDQ $1,DI CMPQ DI,DX JB loop5to7 JMP fail partial_success5to7: MOVL -4(AX)(DI*1), SI CMPL SI,BX JZ success ADDQ $1,DI CMPQ DI,DX JB loop5to7 JMP fail _8_or_more: CMPQ AX, $8 JA _9_or_more MOVQ (R8), R8 LEAQ -7(DI)(DX*1), DX loop8: MOVQ (DI), SI CMPQ SI,R8 JZ success ADDQ $1,DI CMPQ DI,DX JB loop8 JMP fail _9_or_more: CMPQ AX, $15 JA _16_or_more LEAQ 1(DI)(DX*1), DX SUBQ AX, DX MOVQ -8(R8)(AX*1), BX MOVQ (R8), R8 loop9to15: MOVQ (DI), SI CMPQ SI,R8 JZ partial_success9to15 ADDQ $1,DI CMPQ DI,DX JB loop9to15 JMP fail partial_success9to15: MOVQ -8(AX)(DI*1), SI CMPQ SI,BX JZ success ADDQ $1,DI CMPQ DI,DX JB loop9to15 JMP fail _16_or_more: CMPQ AX, $16 JA _17_or_more MOVOU (R8), X1 LEAQ -15(DI)(DX*1), DX loop16: MOVOU (DI), X2 PCMPEQB X1, X2 PMOVMSKB X2, SI CMPQ SI, $0xffff JE success ADDQ $1,DI CMPQ DI,DX JB loop16 JMP fail _17_or_more: CMPQ AX, $31 JA _32_or_more LEAQ 1(DI)(DX*1), DX SUBQ AX, DX MOVOU -16(R8)(AX*1), X0 MOVOU (R8), X1 loop17to31: MOVOU (DI), X2 PCMPEQB X1,X2 PMOVMSKB X2, SI CMPQ SI, $0xffff JE partial_success17to31 ADDQ $1,DI CMPQ DI,DX JB loop17to31 JMP fail partial_success17to31: MOVOU -16(AX)(DI*1), X3 PCMPEQB X0, X3 PMOVMSKB X3, SI CMPQ SI, $0xffff JE success ADDQ $1,DI CMPQ DI,DX JB loop17to31 JMP fail
_32_or_more: CMPQ AX, $32 JA _33_to_63 VMOVDQU (R8), Y1 LEAQ -31(DI)(DX*1), DX loop32: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPMOVMSKB Y3, SI CMPL SI, $0xffffffff JE success_avx2 ADDQ $1,DI CMPQ DI,DX JB loop32 JMP fail_avx2 _33_to_63: LEAQ 1(DI)(DX*1), DX SUBQ AX, DX VMOVDQU -32(R8)(AX*1), Y0 VMOVDQU (R8), Y1 loop33to63: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPMOVMSKB Y3, SI CMPL SI, $0xffffffff JE partial_success33to63 ADDQ $1,DI CMPQ DI,DX JB loop33to63 JMP fail_avx2 partial_success33to63: VMOVDQU -32(AX)(DI*1), Y3 VPCMPEQB Y0, Y3, Y4 VPMOVMSKB Y4, SI CMPL SI, $0xffffffff JE success_avx2 ADDQ $1,DI CMPQ DI,DX JB loop33to63 fail_avx2: VZEROUPPER fail: MOVQ $-1, (R11) RET success_avx2: VZEROUPPER JMP success sse42: CMPB internal∕cpu·X86+const_offsetX86HasSSE42(SB), $1 JNE no_sse42 CMPQ AX, $12 JAE _9_or_more LEAQ 16(R8), SI TESTW $0xff0, SI JEQ no_sse42 MOVOU (R8), X1 LEAQ -15(DI)(DX*1), SI MOVQ $16, R9 SUBQ AX, R9 loop_sse42: PCMPESTRI $0x0c, (DI), X1 CMPQ CX, R9 JBE sse42_success ADDQ R9, DI CMPQ DI, SI JB loop_sse42 PCMPESTRI $0x0c, -1(SI), X1 CMPQ CX, R9 JA fail LEAQ -1(SI), DI sse42_success: ADDQ CX, DI success: SUBQ R10, DI MOVQ DI, (R11) RET
|