mattermost-community-enterp.../vendor/github.com/minio/minlz/asm_amd64.s
Claude ec1f89217a Merge: Complete Mattermost Server with Community Enterprise
Full Mattermost server source with integrated Community Enterprise features.
Includes vendor directory for offline/air-gapped builds.

Structure:
- enterprise-impl/: Enterprise feature implementations
- enterprise-community/: Init files that register implementations
- enterprise/: Bridge imports (community_imports.go)
- vendor/: All dependencies for offline builds

Build (online):
  go build ./cmd/mattermost

Build (offline/air-gapped):
  go build -mod=vendor ./cmd/mattermost

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 23:59:07 +09:00

20792 lines
512 KiB
ArmAsm

// Code generated by command: go run gen.go -out ../asm_amd64.s -stubs ../asm_amd64.go -pkg=minlz. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !purego
#include "textflag.h"
// func _dummy_()
TEXT ·_dummy_(SB), $0
#ifdef GOAMD64_v4
#ifndef GOAMD64_v3
#define GOAMD64_v3
#endif
#endif
RET
// func encodeBlockAsm(dst []byte, src []byte, tmp *[131072]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBlockAsm(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000400, DX
MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm:
MOVOU X0, (BX)
MOVOU X0, 16(BX)
MOVOU X0, 32(BX)
MOVOU X0, 48(BX)
MOVOU X0, 64(BX)
MOVOU X0, 80(BX)
MOVOU X0, 96(BX)
MOVOU X0, 112(BX)
ADDQ $0x80, BX
DECQ DX
JNZ zero_loop_encodeBlockAsm
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), DX
LEAQ -17(DX), BX
LEAQ -17(DX), SI
MOVL SI, 8(SP)
SHRQ $0x05, DX
SUBL DX, BX
LEAQ (CX)(BX*1), BX
MOVQ BX, (SP)
MOVL $0x00000001, DX
MOVL DX, 16(SP)
MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm:
MOVL DX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 4(DX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm
MOVQ (BX)(DX*1), DI
LEAL -2162685(DX), R8
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R10
MOVQ DI, R11
MOVQ DI, R12
SHRQ $0x08, R12
SHLQ $0x10, R11
IMULQ R10, R11
SHRQ $0x31, R11
SHLQ $0x10, R12
IMULQ R10, R12
SHRQ $0x31, R12
MOVL (AX)(R11*4), SI
MOVL (AX)(R12*4), R9
MOVL DX, (AX)(R11*4)
MOVL DX, (AX)(R12*4)
MOVQ DI, R11
SHRQ $0x10, R11
SHLQ $0x10, R11
IMULQ R10, R11
SHRQ $0x31, R11
MOVL DX, R10
SUBL 16(SP), R10
MOVL 1(BX)(R10*1), R12
MOVQ DI, R10
SHRQ $0x08, R10
CMPL R10, R12
JNE no_repeat_found_encodeBlockAsm
LEAL 1(DX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeBlockAsm
repeat_extend_back_loop_encodeBlockAsm:
CMPL DI, SI
JBE repeat_extend_back_end_encodeBlockAsm
MOVB -1(BX)(R8*1), R9
MOVB -1(BX)(DI*1), R10
CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeBlockAsm
repeat_extend_back_end_encodeBlockAsm:
MOVL DI, SI
MOVL 12(SP), R8
SUBL R8, SI
LEAQ 4(CX)(SI*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_1
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_1:
LEAQ (BX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_repeat_emit_lits_encodeBlockAsm
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_repeat_emit_lits_encodeBlockAsm
CMPL R9, $0x00010000
JB three_bytes_repeat_emit_lits_encodeBlockAsm
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm
three_bytes_repeat_emit_lits_encodeBlockAsm:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm
two_bytes_repeat_emit_lits_encodeBlockAsm:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midrepeat_emit_lits_encodeBlockAsm
JMP memmove_long_repeat_emit_lits_encodeBlockAsm
one_byte_repeat_emit_lits_encodeBlockAsm:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_lits_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_repeat_emit_lits_encodeBlockAsm:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm
memmove_midrepeat_emit_lits_encodeBlockAsm:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm
memmove_long_repeat_emit_lits_encodeBlockAsm:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R11
SHRQ $0x05, R11
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R10
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsmlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ SI, R12
JAE emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
repeat_emit_lits_end_encodeBlockAsm:
ADDL $0x05, DX
MOVL DX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL DX, R8
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_repeat_extend_encodeBlockAsm
matchlen_loopback_16_repeat_extend_encodeBlockAsm:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm
LEAL -16(R8), R8
LEAL 16(R11), R11
matchlen_loop_16_entry_repeat_extend_encodeBlockAsm:
CMPL R8, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBlockAsm
JMP matchlen_match8_repeat_extend_encodeBlockAsm
matchlen_bsf_16repeat_extend_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm
matchlen_match8_repeat_extend_encodeBlockAsm:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
LEAL -8(R8), R8
LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeBlockAsm
matchlen_bsf_8_repeat_extend_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm
matchlen_match4_repeat_extend_encodeBlockAsm:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeBlockAsm
LEAL -4(R8), R8
LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeBlockAsm:
CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm
JB repeat_extend_forward_end_encodeBlockAsm
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeBlockAsm
LEAL 2(R11), R11
SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeBlockAsm
matchlen_match1_repeat_extend_encodeBlockAsm:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeBlockAsm
LEAL 1(R11), R11
repeat_extend_forward_end_encodeBlockAsm:
ADDL R11, DX
MOVL DX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitRepeat
LEAL -1(SI), DI
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBlockAsm
LEAL -30(SI), DI
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBlockAsm
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBlockAsm
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_match_repeat_encodeBlockAsm:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_match_repeat_encodeBlockAsm:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_one_match_repeat_encodeBlockAsm:
XORL DI, DI
LEAL -4(DI)(SI*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBlockAsm:
MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm
no_repeat_found_encodeBlockAsm:
CMPL SI, R8
JLE offset_ok_0_encodeBlockAsm
CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm
offset_ok_0_encodeBlockAsm:
SHRQ $0x08, DI
MOVL (AX)(R11*4), SI
LEAL 2(DX), R10
CMPL R9, R8
JLE offset_ok_1_encodeBlockAsm
CMPL (BX)(R9*1), DI
JEQ candidate2_match_encodeBlockAsm
offset_ok_1_encodeBlockAsm:
MOVL R10, (AX)(R11*4)
SHRQ $0x08, DI
CMPL SI, R8
JLE offset_ok_2_encodeBlockAsm
CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm
offset_ok_2_encodeBlockAsm:
MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm
candidate3_match_encodeBlockAsm:
ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm
candidate2_match_encodeBlockAsm:
MOVL R10, (AX)(R11*4)
INCL DX
MOVL R9, SI
candidate_match_encodeBlockAsm:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm
match_extend_back_loop_encodeBlockAsm:
CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm
MOVB -1(BX)(SI*1), R8
MOVB -1(BX)(DX*1), R9
CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm
LEAL -1(DX), DX
DECL SI
JZ match_extend_back_end_encodeBlockAsm
JMP match_extend_back_loop_encodeBlockAsm
match_extend_back_end_encodeBlockAsm:
CMPQ CX, (SP)
JB dst_size_check_ok_2
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_2:
MOVL DX, R8
MOVL DX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBlockAsm
matchlen_loopback_16_match_nolit_encodeBlockAsm:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBlockAsm:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBlockAsm
JMP matchlen_match8_match_nolit_encodeBlockAsm
matchlen_bsf_16match_nolit_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBlockAsm
matchlen_match8_match_nolit_encodeBlockAsm:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBlockAsm
matchlen_bsf_8_match_nolit_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBlockAsm
matchlen_match4_match_nolit_encodeBlockAsm:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBlockAsm
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBlockAsm:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm
JB match_nolit_end_encodeBlockAsm
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBlockAsm
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm
matchlen_match1_match_nolit_encodeBlockAsm:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE match_nolit_end_encodeBlockAsm
LEAL 1(R11), R11
match_nolit_end_encodeBlockAsm:
ADDL R11, DX
ADDL $0x04, R11
MOVL 16(SP), SI
MOVL 12(SP), DI
MOVL DX, 12(SP)
SUBL DI, R8
JZ match_nolits_copy_encodeBlockAsm
LEAQ (BX)(DI*1), DI
CMPL R8, $0x03
JA match_emit_lits_copy_encodeBlockAsm
CMPL SI, $0x40
JB match_emit_lits_copy_encodeBlockAsm
MOVL (DI), DI
CMPL SI, $0x0001003f
JBE match_emit_copy2lits_encodeBlockAsm
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(SI), SI
SHLL $0x0b, SI
LEAL 7(SI)(R8*8), SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_emit_lits_encodeBlockAsm
LEAL -60(R11), R9
CMPL R11, $0x0000013c
JB emit_copy3_1_match_emit_lits_encodeBlockAsm
CMPL R11, $0x0001003c
JB emit_copy3_2_match_emit_lits_encodeBlockAsm
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL R9, 4(CX)
ADDQ $0x07, CX
JMP match_emit_copy_litsencodeBlockAsm
emit_copy3_2_match_emit_lits_encodeBlockAsm:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW R9, 4(CX)
ADDQ $0x06, CX
JMP match_emit_copy_litsencodeBlockAsm
emit_copy3_1_match_emit_lits_encodeBlockAsm:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB R9, 4(CX)
ADDQ $0x05, CX
JMP match_emit_copy_litsencodeBlockAsm
emit_copy3_0_match_emit_lits_encodeBlockAsm:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
match_emit_copy_litsencodeBlockAsm:
MOVL DI, (CX)
ADDQ R8, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
match_emit_copy2lits_encodeBlockAsm:
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, SI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, SI
CMOVLLT R11, SI
LEAL -1(R8)(SI*4), SI
MOVL $0x00000003, R10
LEAL (R10)(SI*8), SI
MOVB SI, (CX)
ADDQ $0x03, CX
MOVL DI, (CX)
ADDQ R8, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBlockAsm
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBlockAsm
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBlockAsm
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBlockAsm
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_emit_repeat_copy2_encodeBlockAsm:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_emit_repeat_copy2_encodeBlockAsm:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_one_match_emit_repeat_copy2_encodeBlockAsm:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
match_emit_lits_copy_encodeBlockAsm:
LEAQ 4(CX)(R8*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_3
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_3:
// emitLiteral
LEAL -1(R8), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBlockAsm
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm
CMPL R9, $0x00010000
JB three_bytes_match_emit_encodeBlockAsm
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm
three_bytes_match_emit_encodeBlockAsm:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm
two_bytes_match_emit_encodeBlockAsm:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBlockAsm
JMP memmove_long_match_emit_encodeBlockAsm
one_byte_match_emit_encodeBlockAsm:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
MOVOU (DI), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBlockAsm:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm
memmove_midmatch_emit_encodeBlockAsm:
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBlockAsm_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_mid_match_emit_encodeBlockAsm_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBlockAsm:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm
memmove_long_match_emit_encodeBlockAsm:
LEAQ (CX)(R8*1), R9
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
LEAQ -32(DI)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(DI)(R13*1), X4
MOVOU -16(DI)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R9, CX
match_nolits_copy_encodeBlockAsm:
// emitCopy
CMPL SI, $0x0001003f
JBE two_byte_offset_match_nolit_encodeBlockAsm
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(SI), SI
SHLL $0x0b, SI
ADDL $0x07, SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_nolit_encodeBlockAsm_emit3
LEAL -60(R11), DI
CMPL R11, $0x0000013c
JB emit_copy3_1_match_nolit_encodeBlockAsm_emit3
CMPL R11, $0x0001003c
JB emit_copy3_2_match_nolit_encodeBlockAsm_emit3
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL DI, 4(CX)
ADDQ $0x07, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy3_2_match_nolit_encodeBlockAsm_emit3:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW DI, 4(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy3_1_match_nolit_encodeBlockAsm_emit3:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB DI, 4(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy3_0_match_nolit_encodeBlockAsm_emit3:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
two_byte_offset_match_nolit_encodeBlockAsm:
CMPL SI, $0x00000400
JA two_byte_match_nolit_encodeBlockAsm
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBlockAsm
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_one_longer_match_nolit_encodeBlockAsm:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBlockAsm
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy1_repeat_match_nolit_encodeBlockAsm:
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
two_byte_match_nolit_encodeBlockAsm:
// emitCopy2
LEAL -64(SI), SI
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBlockAsm_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBlockAsm_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBlockAsm_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy2_2_match_nolit_encodeBlockAsm_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy2_1_match_nolit_encodeBlockAsm_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy2_0_match_nolit_encodeBlockAsm_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm:
CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm
MOVQ -2(BX)(DX*1), DI
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, R9
SHLQ $0x10, R8
IMULQ SI, R8
SHRQ $0x31, R8
SHLQ $0x10, R9
IMULQ SI, R9
SHRQ $0x31, R9
LEAL -2(DX), R10
MOVL (AX)(R9*4), SI
MOVL R10, (AX)(R8*4)
MOVL DX, (AX)(R9*4)
MOVL DX, R8
INCL DX
LEAL -2162687(R8), R9
CMPL SI, R9
JA match_nolit_len_okencodeBlockAsm
JMP search_loop_encodeBlockAsm
match_nolit_len_okencodeBlockAsm:
CMPL (BX)(SI*1), DI
JNE search_loop_encodeBlockAsm
MOVL R8, DI
SUBL SI, DI
MOVL DI, 16(SP)
CMPQ CX, (SP)
JB dst_size_check_ok_4
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_4:
ADDL $0x03, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R8
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit2_encodeBlockAsm
matchlen_loopback_16_match_nolit2_encodeBlockAsm:
MOVQ (R8)(R11*1), R9
MOVQ 8(R8)(R11*1), R10
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm
XORQ 8(SI)(R11*1), R10
JNZ matchlen_bsf_16match_nolit2_encodeBlockAsm
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit2_encodeBlockAsm:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit2_encodeBlockAsm
JMP matchlen_match8_match_nolit2_encodeBlockAsm
matchlen_bsf_16match_nolit2_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL 8(R11)(R10*1), R11
JMP match_nolit2_end_encodeBlockAsm
matchlen_match8_match_nolit2_encodeBlockAsm:
CMPL DI, $0x08
JB matchlen_match4_match_nolit2_encodeBlockAsm
MOVQ (R8)(R11*1), R9
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit2_encodeBlockAsm
matchlen_bsf_8_match_nolit2_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R11)(R9*1), R11
JMP match_nolit2_end_encodeBlockAsm
matchlen_match4_match_nolit2_encodeBlockAsm:
CMPL DI, $0x04
JB matchlen_match2_match_nolit2_encodeBlockAsm
MOVL (R8)(R11*1), R9
CMPL (SI)(R11*1), R9
JNE matchlen_match2_match_nolit2_encodeBlockAsm
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit2_encodeBlockAsm:
CMPL DI, $0x01
JE matchlen_match1_match_nolit2_encodeBlockAsm
JB match_nolit2_end_encodeBlockAsm
MOVW (R8)(R11*1), R9
CMPW (SI)(R11*1), R9
JNE matchlen_match1_match_nolit2_encodeBlockAsm
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit2_end_encodeBlockAsm
matchlen_match1_match_nolit2_encodeBlockAsm:
MOVB (R8)(R11*1), R9
CMPB (SI)(R11*1), R9
JNE match_nolit2_end_encodeBlockAsm
LEAL 1(R11), R11
match_nolit2_end_encodeBlockAsm:
ADDL R11, DX
ADDL $0x04, R11
MOVL DX, 12(SP)
MOVL 16(SP), SI
JMP match_nolits_copy_encodeBlockAsm
emit_remainder_encodeBlockAsm:
MOVQ src_len+32(FP), AX
MOVL 12(SP), DX
SUBL DX, AX
JZ emit_remainder_end_encodeBlockAsm
LEAQ (BX)(DX*1), DX
LEAQ 4(CX)(AX*1), BX
CMPQ BX, (SP)
JB dst_size_check_ok_5
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_5:
// emitLiteral
LEAL -1(AX), BX
CMPL BX, $0x1d
JB one_byte_emit_remainder_encodeBlockAsm
SUBL $0x1d, BX
CMPL BX, $0x00000100
JB two_bytes_emit_remainder_encodeBlockAsm
CMPL BX, $0x00010000
JB three_bytes_emit_remainder_encodeBlockAsm
MOVL BX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW BX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm
three_bytes_emit_remainder_encodeBlockAsm:
MOVB $0xf0, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm
two_bytes_emit_remainder_encodeBlockAsm:
MOVB $0xe8, (CX)
MOVB BL, 1(CX)
ADDL $0x1d, BX
ADDQ $0x02, CX
CMPL BX, $0x40
JB memmove_midemit_remainder_encodeBlockAsm
JMP memmove_long_emit_remainder_encodeBlockAsm
one_byte_emit_remainder_encodeBlockAsm:
SHLB $0x03, BL
MOVB BL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ AX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
CMPQ AX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through8
CMPQ AX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
CMPQ AX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
MOVB (DX), SI
MOVB -1(DX)(AX*1), DL
MOVB SI, (CX)
MOVB DL, -1(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
MOVW (DX), SI
MOVB 2(DX), DL
MOVW SI, (CX)
MOVB DL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through8:
MOVL (DX), SI
MOVL -4(DX)(AX*1), DX
MOVL SI, (CX)
MOVL DX, -4(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
MOVQ (DX), SI
MOVQ -8(DX)(AX*1), DX
MOVQ SI, (CX)
MOVQ DX, -8(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm
memmove_midemit_remainder_encodeBlockAsm:
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ AX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_mid_end_copy_emit_remainder_encodeBlockAsm:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm
memmove_long_emit_remainder_encodeBlockAsm:
LEAQ (CX)(AX*1), BX
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVQ AX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
LEAQ -32(DX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(DX)(R8*1), X4
MOVOU -16(DX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ AX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
MOVQ BX, CX
emit_remainder_end_encodeBlockAsm:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBlockAsm2MB(dst []byte, src []byte, tmp *[131072]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBlockAsm2MB(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000400, DX
MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm2MB:
MOVOU X0, (BX)
MOVOU X0, 16(BX)
MOVOU X0, 32(BX)
MOVOU X0, 48(BX)
MOVOU X0, 64(BX)
MOVOU X0, 80(BX)
MOVOU X0, 96(BX)
MOVOU X0, 112(BX)
ADDQ $0x80, BX
DECQ DX
JNZ zero_loop_encodeBlockAsm2MB
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), DX
LEAQ -17(DX), BX
LEAQ -17(DX), SI
MOVL SI, 8(SP)
SHRQ $0x05, DX
SUBL DX, BX
LEAQ (CX)(BX*1), BX
MOVQ BX, (SP)
MOVL $0x00000001, DX
MOVL DX, 16(SP)
MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm2MB:
MOVL DX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 4(DX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm2MB
MOVQ (BX)(DX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x31, R10
SHLQ $0x10, R11
IMULQ R9, R11
SHRQ $0x31, R11
MOVL (AX)(R10*4), SI
MOVL (AX)(R11*4), R8
MOVL DX, (AX)(R10*4)
MOVL DX, (AX)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x31, R10
MOVL DX, R9
SUBL 16(SP), R9
MOVL 1(BX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm2MB
LEAL 1(DX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeBlockAsm2MB
repeat_extend_back_loop_encodeBlockAsm2MB:
CMPL DI, SI
JBE repeat_extend_back_end_encodeBlockAsm2MB
MOVB -1(BX)(R8*1), R9
MOVB -1(BX)(DI*1), R10
CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm2MB
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeBlockAsm2MB
repeat_extend_back_end_encodeBlockAsm2MB:
MOVL DI, SI
MOVL 12(SP), R8
SUBL R8, SI
LEAQ 4(CX)(SI*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_1
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_1:
LEAQ (BX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_repeat_emit_lits_encodeBlockAsm2MB
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_repeat_emit_lits_encodeBlockAsm2MB
CMPL R9, $0x00010000
JB three_bytes_repeat_emit_lits_encodeBlockAsm2MB
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm2MB
three_bytes_repeat_emit_lits_encodeBlockAsm2MB:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm2MB
two_bytes_repeat_emit_lits_encodeBlockAsm2MB:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midrepeat_emit_lits_encodeBlockAsm2MB
JMP memmove_long_repeat_emit_lits_encodeBlockAsm2MB
one_byte_repeat_emit_lits_encodeBlockAsm2MB:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm2MB
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm2MB
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_repeat_emit_lits_encodeBlockAsm2MB:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm2MB
memmove_midrepeat_emit_lits_encodeBlockAsm2MB:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm2MB
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm2MB:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm2MB
memmove_long_repeat_emit_lits_encodeBlockAsm2MB:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R11
SHRQ $0x05, R11
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm2MBlarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R10
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm2MBlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm2MBlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm2MBlarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ SI, R12
JAE emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm2MBlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
repeat_emit_lits_end_encodeBlockAsm2MB:
ADDL $0x05, DX
MOVL DX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL DX, R8
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_repeat_extend_encodeBlockAsm2MB
matchlen_loopback_16_repeat_extend_encodeBlockAsm2MB:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm2MB
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm2MB
LEAL -16(R8), R8
LEAL 16(R11), R11
matchlen_loop_16_entry_repeat_extend_encodeBlockAsm2MB:
CMPL R8, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBlockAsm2MB
JMP matchlen_match8_repeat_extend_encodeBlockAsm2MB
matchlen_bsf_16repeat_extend_encodeBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm2MB
matchlen_match8_repeat_extend_encodeBlockAsm2MB:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm2MB
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm2MB
LEAL -8(R8), R8
LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeBlockAsm2MB
matchlen_bsf_8_repeat_extend_encodeBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm2MB
matchlen_match4_repeat_extend_encodeBlockAsm2MB:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm2MB
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeBlockAsm2MB
LEAL -4(R8), R8
LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeBlockAsm2MB:
CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm2MB
JB repeat_extend_forward_end_encodeBlockAsm2MB
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeBlockAsm2MB
LEAL 2(R11), R11
SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeBlockAsm2MB
matchlen_match1_repeat_extend_encodeBlockAsm2MB:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeBlockAsm2MB
LEAL 1(R11), R11
repeat_extend_forward_end_encodeBlockAsm2MB:
ADDL R11, DX
MOVL DX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitRepeat
LEAL -1(SI), DI
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBlockAsm2MB
LEAL -30(SI), DI
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBlockAsm2MB
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBlockAsm2MB
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm2MB
repeat_three_match_repeat_encodeBlockAsm2MB:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm2MB
repeat_two_match_repeat_encodeBlockAsm2MB:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm2MB
repeat_one_match_repeat_encodeBlockAsm2MB:
XORL DI, DI
LEAL -4(DI)(SI*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBlockAsm2MB:
MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm2MB
no_repeat_found_encodeBlockAsm2MB:
CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm2MB
SHRQ $0x08, DI
MOVL (AX)(R10*4), SI
LEAL 2(DX), R9
CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm2MB
MOVL R9, (AX)(R10*4)
SHRQ $0x08, DI
CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm2MB
MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm2MB
candidate3_match_encodeBlockAsm2MB:
ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm2MB
candidate2_match_encodeBlockAsm2MB:
MOVL R9, (AX)(R10*4)
INCL DX
MOVL R8, SI
candidate_match_encodeBlockAsm2MB:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm2MB
match_extend_back_loop_encodeBlockAsm2MB:
CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm2MB
MOVB -1(BX)(SI*1), R8
MOVB -1(BX)(DX*1), R9
CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm2MB
LEAL -1(DX), DX
DECL SI
JZ match_extend_back_end_encodeBlockAsm2MB
JMP match_extend_back_loop_encodeBlockAsm2MB
match_extend_back_end_encodeBlockAsm2MB:
CMPQ CX, (SP)
JB dst_size_check_ok_2
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_2:
MOVL DX, R8
MOVL DX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBlockAsm2MB
matchlen_loopback_16_match_nolit_encodeBlockAsm2MB:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm2MB
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm2MB
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBlockAsm2MB:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBlockAsm2MB
JMP matchlen_match8_match_nolit_encodeBlockAsm2MB
matchlen_bsf_16match_nolit_encodeBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBlockAsm2MB
matchlen_match8_match_nolit_encodeBlockAsm2MB:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm2MB
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm2MB
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBlockAsm2MB
matchlen_bsf_8_match_nolit_encodeBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBlockAsm2MB
matchlen_match4_match_nolit_encodeBlockAsm2MB:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm2MB
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBlockAsm2MB
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBlockAsm2MB:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm2MB
JB match_nolit_end_encodeBlockAsm2MB
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBlockAsm2MB
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm2MB
matchlen_match1_match_nolit_encodeBlockAsm2MB:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE match_nolit_end_encodeBlockAsm2MB
LEAL 1(R11), R11
match_nolit_end_encodeBlockAsm2MB:
ADDL R11, DX
ADDL $0x04, R11
MOVL 16(SP), SI
MOVL 12(SP), DI
MOVL DX, 12(SP)
SUBL DI, R8
JZ match_nolits_copy_encodeBlockAsm2MB
LEAQ (BX)(DI*1), DI
CMPL R8, $0x03
JA match_emit_lits_copy_encodeBlockAsm2MB
CMPL SI, $0x40
JB match_emit_lits_copy_encodeBlockAsm2MB
MOVL (DI), DI
CMPL SI, $0x0001003f
JBE match_emit_copy2lits_encodeBlockAsm2MB
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(SI), SI
SHLL $0x0b, SI
LEAL 7(SI)(R8*8), SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_emit_lits_encodeBlockAsm2MB
LEAL -60(R11), R9
CMPL R11, $0x0000013c
JB emit_copy3_1_match_emit_lits_encodeBlockAsm2MB
CMPL R11, $0x0001003c
JB emit_copy3_2_match_emit_lits_encodeBlockAsm2MB
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL R9, 4(CX)
ADDQ $0x07, CX
JMP match_emit_copy_litsencodeBlockAsm2MB
emit_copy3_2_match_emit_lits_encodeBlockAsm2MB:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW R9, 4(CX)
ADDQ $0x06, CX
JMP match_emit_copy_litsencodeBlockAsm2MB
emit_copy3_1_match_emit_lits_encodeBlockAsm2MB:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB R9, 4(CX)
ADDQ $0x05, CX
JMP match_emit_copy_litsencodeBlockAsm2MB
emit_copy3_0_match_emit_lits_encodeBlockAsm2MB:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
match_emit_copy_litsencodeBlockAsm2MB:
MOVL DI, (CX)
ADDQ R8, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
match_emit_copy2lits_encodeBlockAsm2MB:
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, SI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, SI
CMOVLLT R11, SI
LEAL -1(R8)(SI*4), SI
MOVL $0x00000003, R10
LEAL (R10)(SI*8), SI
MOVB SI, (CX)
ADDQ $0x03, CX
MOVL DI, (CX)
ADDQ R8, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBlockAsm2MB
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBlockAsm2MB
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBlockAsm2MB
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBlockAsm2MB
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
repeat_three_match_emit_repeat_copy2_encodeBlockAsm2MB:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
repeat_two_match_emit_repeat_copy2_encodeBlockAsm2MB:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
repeat_one_match_emit_repeat_copy2_encodeBlockAsm2MB:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
match_emit_lits_copy_encodeBlockAsm2MB:
LEAQ 4(CX)(R8*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_3
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_3:
// emitLiteral
LEAL -1(R8), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBlockAsm2MB
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm2MB
CMPL R9, $0x00010000
JB three_bytes_match_emit_encodeBlockAsm2MB
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm2MB
three_bytes_match_emit_encodeBlockAsm2MB:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm2MB
two_bytes_match_emit_encodeBlockAsm2MB:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBlockAsm2MB
JMP memmove_long_match_emit_encodeBlockAsm2MB
one_byte_match_emit_encodeBlockAsm2MB:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm2MB_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm2MB_memmove_move_8through16:
MOVOU (DI), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm2MB
emit_lit_memmove_match_emit_encodeBlockAsm2MB_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm2MB
emit_lit_memmove_match_emit_encodeBlockAsm2MB_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBlockAsm2MB:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm2MB
memmove_midmatch_emit_encodeBlockAsm2MB:
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBlockAsm2MB_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBlockAsm2MB
emit_lit_memmove_mid_match_emit_encodeBlockAsm2MB_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBlockAsm2MB:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm2MB
memmove_long_match_emit_encodeBlockAsm2MB:
LEAQ (CX)(R8*1), R9
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBlockAsm2MBlarge_forward_sse_loop_32
LEAQ -32(DI)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBlockAsm2MBlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm2MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm2MBlarge_forward_sse_loop_32:
MOVOU -32(DI)(R13*1), X4
MOVOU -16(DI)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm2MBlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R9, CX
match_nolits_copy_encodeBlockAsm2MB:
// emitCopy
CMPL SI, $0x0001003f
JBE two_byte_offset_match_nolit_encodeBlockAsm2MB
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(SI), SI
SHLL $0x0b, SI
ADDL $0x07, SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_nolit_encodeBlockAsm2MB_emit3
LEAL -60(R11), DI
CMPL R11, $0x0000013c
JB emit_copy3_1_match_nolit_encodeBlockAsm2MB_emit3
CMPL R11, $0x0001003c
JB emit_copy3_2_match_nolit_encodeBlockAsm2MB_emit3
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL DI, 4(CX)
ADDQ $0x07, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_copy3_2_match_nolit_encodeBlockAsm2MB_emit3:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW DI, 4(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_copy3_1_match_nolit_encodeBlockAsm2MB_emit3:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB DI, 4(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_copy3_0_match_nolit_encodeBlockAsm2MB_emit3:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
two_byte_offset_match_nolit_encodeBlockAsm2MB:
CMPL SI, $0x00000400
JA two_byte_match_nolit_encodeBlockAsm2MB
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBlockAsm2MB
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_one_longer_match_nolit_encodeBlockAsm2MB:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBlockAsm2MB
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_copy1_repeat_match_nolit_encodeBlockAsm2MB:
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm2MB
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm2MB
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm2MB
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm2MB:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm2MB:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm2MB:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
two_byte_match_nolit_encodeBlockAsm2MB:
// emitCopy2
LEAL -64(SI), SI
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBlockAsm2MB_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBlockAsm2MB_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBlockAsm2MB_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_copy2_2_match_nolit_encodeBlockAsm2MB_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_copy2_1_match_nolit_encodeBlockAsm2MB_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm2MB
emit_copy2_0_match_nolit_encodeBlockAsm2MB_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm2MB:
CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm2MB
MOVQ -2(BX)(DX*1), DI
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm2MB
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm2MB:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, R9
SHLQ $0x10, R8
IMULQ SI, R8
SHRQ $0x31, R8
SHLQ $0x10, R9
IMULQ SI, R9
SHRQ $0x31, R9
LEAL -2(DX), R10
MOVL (AX)(R9*4), SI
MOVL R10, (AX)(R8*4)
MOVL DX, (AX)(R9*4)
MOVL DX, R8
INCL DX
CMPL (BX)(SI*1), DI
JNE search_loop_encodeBlockAsm2MB
MOVL R8, DI
SUBL SI, DI
MOVL DI, 16(SP)
CMPQ CX, (SP)
JB dst_size_check_ok_4
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_4:
ADDL $0x03, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R8
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit2_encodeBlockAsm2MB
matchlen_loopback_16_match_nolit2_encodeBlockAsm2MB:
MOVQ (R8)(R11*1), R9
MOVQ 8(R8)(R11*1), R10
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm2MB
XORQ 8(SI)(R11*1), R10
JNZ matchlen_bsf_16match_nolit2_encodeBlockAsm2MB
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit2_encodeBlockAsm2MB:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit2_encodeBlockAsm2MB
JMP matchlen_match8_match_nolit2_encodeBlockAsm2MB
matchlen_bsf_16match_nolit2_encodeBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL 8(R11)(R10*1), R11
JMP match_nolit2_end_encodeBlockAsm2MB
matchlen_match8_match_nolit2_encodeBlockAsm2MB:
CMPL DI, $0x08
JB matchlen_match4_match_nolit2_encodeBlockAsm2MB
MOVQ (R8)(R11*1), R9
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm2MB
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit2_encodeBlockAsm2MB
matchlen_bsf_8_match_nolit2_encodeBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R11)(R9*1), R11
JMP match_nolit2_end_encodeBlockAsm2MB
matchlen_match4_match_nolit2_encodeBlockAsm2MB:
CMPL DI, $0x04
JB matchlen_match2_match_nolit2_encodeBlockAsm2MB
MOVL (R8)(R11*1), R9
CMPL (SI)(R11*1), R9
JNE matchlen_match2_match_nolit2_encodeBlockAsm2MB
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit2_encodeBlockAsm2MB:
CMPL DI, $0x01
JE matchlen_match1_match_nolit2_encodeBlockAsm2MB
JB match_nolit2_end_encodeBlockAsm2MB
MOVW (R8)(R11*1), R9
CMPW (SI)(R11*1), R9
JNE matchlen_match1_match_nolit2_encodeBlockAsm2MB
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit2_end_encodeBlockAsm2MB
matchlen_match1_match_nolit2_encodeBlockAsm2MB:
MOVB (R8)(R11*1), R9
CMPB (SI)(R11*1), R9
JNE match_nolit2_end_encodeBlockAsm2MB
LEAL 1(R11), R11
match_nolit2_end_encodeBlockAsm2MB:
ADDL R11, DX
ADDL $0x04, R11
MOVL DX, 12(SP)
MOVL 16(SP), SI
JMP match_nolits_copy_encodeBlockAsm2MB
emit_remainder_encodeBlockAsm2MB:
MOVQ src_len+32(FP), AX
MOVL 12(SP), DX
SUBL DX, AX
JZ emit_remainder_end_encodeBlockAsm2MB
LEAQ (BX)(DX*1), DX
LEAQ 4(CX)(AX*1), BX
CMPQ BX, (SP)
JB dst_size_check_ok_5
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_5:
// emitLiteral
LEAL -1(AX), BX
CMPL BX, $0x1d
JB one_byte_emit_remainder_encodeBlockAsm2MB
SUBL $0x1d, BX
CMPL BX, $0x00000100
JB two_bytes_emit_remainder_encodeBlockAsm2MB
CMPL BX, $0x00010000
JB three_bytes_emit_remainder_encodeBlockAsm2MB
MOVL BX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW BX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm2MB
three_bytes_emit_remainder_encodeBlockAsm2MB:
MOVB $0xf0, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm2MB
two_bytes_emit_remainder_encodeBlockAsm2MB:
MOVB $0xe8, (CX)
MOVB BL, 1(CX)
ADDL $0x1d, BX
ADDQ $0x02, CX
CMPL BX, $0x40
JB memmove_midemit_remainder_encodeBlockAsm2MB
JMP memmove_long_emit_remainder_encodeBlockAsm2MB
one_byte_emit_remainder_encodeBlockAsm2MB:
SHLB $0x03, BL
MOVB BL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ AX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_3
CMPQ AX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_4through8
CMPQ AX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_8through16
CMPQ AX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_1or2:
MOVB (DX), SI
MOVB -1(DX)(AX*1), DL
MOVB SI, (CX)
MOVB DL, -1(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_3:
MOVW (DX), SI
MOVB 2(DX), DL
MOVW SI, (CX)
MOVB DL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_4through8:
MOVL (DX), SI
MOVL -4(DX)(AX*1), DX
MOVL SI, (CX)
MOVL DX, -4(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_8through16:
MOVQ (DX), SI
MOVQ -8(DX)(AX*1), DX
MOVQ SI, (CX)
MOVQ DX, -8(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBlockAsm2MB_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm2MB:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm2MB
memmove_midemit_remainder_encodeBlockAsm2MB:
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ AX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm2MB_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBlockAsm2MB
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm2MB_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_mid_end_copy_emit_remainder_encodeBlockAsm2MB:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm2MB
memmove_long_emit_remainder_encodeBlockAsm2MB:
LEAQ (CX)(AX*1), BX
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVQ AX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm2MBlarge_forward_sse_loop_32
LEAQ -32(DX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm2MBlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm2MBlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm2MBlarge_forward_sse_loop_32:
MOVOU -32(DX)(R8*1), X4
MOVOU -16(DX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ AX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm2MBlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
MOVQ BX, CX
emit_remainder_end_encodeBlockAsm2MB:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBlockAsm512K(dst []byte, src []byte, tmp *[65536]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBlockAsm512K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000200, DX
MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm512K:
MOVOU X0, (BX)
MOVOU X0, 16(BX)
MOVOU X0, 32(BX)
MOVOU X0, 48(BX)
MOVOU X0, 64(BX)
MOVOU X0, 80(BX)
MOVOU X0, 96(BX)
MOVOU X0, 112(BX)
ADDQ $0x80, BX
DECQ DX
JNZ zero_loop_encodeBlockAsm512K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), DX
LEAQ -17(DX), BX
LEAQ -17(DX), SI
MOVL SI, 8(SP)
SHRQ $0x05, DX
SUBL DX, BX
LEAQ (CX)(BX*1), BX
MOVQ BX, (SP)
MOVL $0x00000001, DX
MOVL DX, 16(SP)
MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm512K:
MOVL DX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 4(DX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm512K
MOVQ (BX)(DX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x10, R11
IMULQ R9, R11
SHRQ $0x32, R11
MOVL (AX)(R10*4), SI
MOVL (AX)(R11*4), R8
MOVL DX, (AX)(R10*4)
MOVL DX, (AX)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVL DX, R9
SUBL 16(SP), R9
MOVL 1(BX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm512K
LEAL 1(DX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeBlockAsm512K
repeat_extend_back_loop_encodeBlockAsm512K:
CMPL DI, SI
JBE repeat_extend_back_end_encodeBlockAsm512K
MOVB -1(BX)(R8*1), R9
MOVB -1(BX)(DI*1), R10
CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm512K
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeBlockAsm512K
repeat_extend_back_end_encodeBlockAsm512K:
MOVL DI, SI
MOVL 12(SP), R8
SUBL R8, SI
LEAQ 4(CX)(SI*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_1
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_1:
LEAQ (BX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_repeat_emit_lits_encodeBlockAsm512K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_repeat_emit_lits_encodeBlockAsm512K
CMPL R9, $0x00010000
JB three_bytes_repeat_emit_lits_encodeBlockAsm512K
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm512K
three_bytes_repeat_emit_lits_encodeBlockAsm512K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm512K
two_bytes_repeat_emit_lits_encodeBlockAsm512K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midrepeat_emit_lits_encodeBlockAsm512K
JMP memmove_long_repeat_emit_lits_encodeBlockAsm512K
one_byte_repeat_emit_lits_encodeBlockAsm512K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm512K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_lits_encodeBlockAsm512K_memmove_move_33through64
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm512K_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm512K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm512K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_repeat_emit_lits_encodeBlockAsm512K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm512K
memmove_midrepeat_emit_lits_encodeBlockAsm512K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm512K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm512K
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm512K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm512K
memmove_long_repeat_emit_lits_encodeBlockAsm512K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R11
SHRQ $0x05, R11
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm512Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R10
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm512Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm512Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm512Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ SI, R12
JAE emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm512Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
repeat_emit_lits_end_encodeBlockAsm512K:
ADDL $0x05, DX
MOVL DX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL DX, R8
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_repeat_extend_encodeBlockAsm512K
matchlen_loopback_16_repeat_extend_encodeBlockAsm512K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm512K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm512K
LEAL -16(R8), R8
LEAL 16(R11), R11
matchlen_loop_16_entry_repeat_extend_encodeBlockAsm512K:
CMPL R8, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBlockAsm512K
JMP matchlen_match8_repeat_extend_encodeBlockAsm512K
matchlen_bsf_16repeat_extend_encodeBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm512K
matchlen_match8_repeat_extend_encodeBlockAsm512K:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm512K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm512K
LEAL -8(R8), R8
LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeBlockAsm512K
matchlen_bsf_8_repeat_extend_encodeBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm512K
matchlen_match4_repeat_extend_encodeBlockAsm512K:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm512K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeBlockAsm512K
LEAL -4(R8), R8
LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeBlockAsm512K:
CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm512K
JB repeat_extend_forward_end_encodeBlockAsm512K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeBlockAsm512K
LEAL 2(R11), R11
SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeBlockAsm512K
matchlen_match1_repeat_extend_encodeBlockAsm512K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeBlockAsm512K
LEAL 1(R11), R11
repeat_extend_forward_end_encodeBlockAsm512K:
ADDL R11, DX
MOVL DX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitRepeat
LEAL -1(SI), DI
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBlockAsm512K
LEAL -30(SI), DI
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBlockAsm512K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBlockAsm512K
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm512K
repeat_three_match_repeat_encodeBlockAsm512K:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm512K
repeat_two_match_repeat_encodeBlockAsm512K:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm512K
repeat_one_match_repeat_encodeBlockAsm512K:
XORL DI, DI
LEAL -4(DI)(SI*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBlockAsm512K:
MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm512K
no_repeat_found_encodeBlockAsm512K:
CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm512K
SHRQ $0x08, DI
MOVL (AX)(R10*4), SI
LEAL 2(DX), R9
CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm512K
MOVL R9, (AX)(R10*4)
SHRQ $0x08, DI
CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm512K
MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm512K
candidate3_match_encodeBlockAsm512K:
ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm512K
candidate2_match_encodeBlockAsm512K:
MOVL R9, (AX)(R10*4)
INCL DX
MOVL R8, SI
candidate_match_encodeBlockAsm512K:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm512K
match_extend_back_loop_encodeBlockAsm512K:
CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm512K
MOVB -1(BX)(SI*1), R8
MOVB -1(BX)(DX*1), R9
CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm512K
LEAL -1(DX), DX
DECL SI
JZ match_extend_back_end_encodeBlockAsm512K
JMP match_extend_back_loop_encodeBlockAsm512K
match_extend_back_end_encodeBlockAsm512K:
CMPQ CX, (SP)
JB dst_size_check_ok_2
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_2:
MOVL DX, R8
MOVL DX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBlockAsm512K
matchlen_loopback_16_match_nolit_encodeBlockAsm512K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm512K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm512K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBlockAsm512K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBlockAsm512K
JMP matchlen_match8_match_nolit_encodeBlockAsm512K
matchlen_bsf_16match_nolit_encodeBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBlockAsm512K
matchlen_match8_match_nolit_encodeBlockAsm512K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm512K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm512K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBlockAsm512K
matchlen_bsf_8_match_nolit_encodeBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBlockAsm512K
matchlen_match4_match_nolit_encodeBlockAsm512K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm512K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBlockAsm512K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBlockAsm512K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm512K
JB match_nolit_end_encodeBlockAsm512K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBlockAsm512K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm512K
matchlen_match1_match_nolit_encodeBlockAsm512K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE match_nolit_end_encodeBlockAsm512K
LEAL 1(R11), R11
match_nolit_end_encodeBlockAsm512K:
ADDL R11, DX
ADDL $0x04, R11
MOVL 16(SP), SI
MOVL 12(SP), DI
MOVL DX, 12(SP)
SUBL DI, R8
JZ match_nolits_copy_encodeBlockAsm512K
LEAQ (BX)(DI*1), DI
CMPL R8, $0x03
JA match_emit_lits_copy_encodeBlockAsm512K
CMPL SI, $0x40
JB match_emit_lits_copy_encodeBlockAsm512K
MOVL (DI), DI
CMPL SI, $0x0001003f
JBE match_emit_copy2lits_encodeBlockAsm512K
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(SI), SI
SHLL $0x0b, SI
LEAL 7(SI)(R8*8), SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_emit_lits_encodeBlockAsm512K
LEAL -60(R11), R9
CMPL R11, $0x0000013c
JB emit_copy3_1_match_emit_lits_encodeBlockAsm512K
CMPL R11, $0x0001003c
JB emit_copy3_2_match_emit_lits_encodeBlockAsm512K
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL R9, 4(CX)
ADDQ $0x07, CX
JMP match_emit_copy_litsencodeBlockAsm512K
emit_copy3_2_match_emit_lits_encodeBlockAsm512K:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW R9, 4(CX)
ADDQ $0x06, CX
JMP match_emit_copy_litsencodeBlockAsm512K
emit_copy3_1_match_emit_lits_encodeBlockAsm512K:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB R9, 4(CX)
ADDQ $0x05, CX
JMP match_emit_copy_litsencodeBlockAsm512K
emit_copy3_0_match_emit_lits_encodeBlockAsm512K:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
match_emit_copy_litsencodeBlockAsm512K:
MOVL DI, (CX)
ADDQ R8, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
match_emit_copy2lits_encodeBlockAsm512K:
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, SI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, SI
CMOVLLT R11, SI
LEAL -1(R8)(SI*4), SI
MOVL $0x00000003, R10
LEAL (R10)(SI*8), SI
MOVB SI, (CX)
ADDQ $0x03, CX
MOVL DI, (CX)
ADDQ R8, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBlockAsm512K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBlockAsm512K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBlockAsm512K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBlockAsm512K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
repeat_three_match_emit_repeat_copy2_encodeBlockAsm512K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
repeat_two_match_emit_repeat_copy2_encodeBlockAsm512K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
repeat_one_match_emit_repeat_copy2_encodeBlockAsm512K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
match_emit_lits_copy_encodeBlockAsm512K:
LEAQ 4(CX)(R8*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_3
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_3:
// emitLiteral
LEAL -1(R8), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBlockAsm512K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm512K
CMPL R9, $0x00010000
JB three_bytes_match_emit_encodeBlockAsm512K
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm512K
three_bytes_match_emit_encodeBlockAsm512K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm512K
two_bytes_match_emit_encodeBlockAsm512K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBlockAsm512K
JMP memmove_long_match_emit_encodeBlockAsm512K
one_byte_match_emit_encodeBlockAsm512K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm512K_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm512K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm512K_memmove_move_8through16:
MOVOU (DI), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm512K
emit_lit_memmove_match_emit_encodeBlockAsm512K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm512K
emit_lit_memmove_match_emit_encodeBlockAsm512K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBlockAsm512K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm512K
memmove_midmatch_emit_encodeBlockAsm512K:
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBlockAsm512K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBlockAsm512K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBlockAsm512K
emit_lit_memmove_mid_match_emit_encodeBlockAsm512K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBlockAsm512K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm512K
memmove_long_match_emit_encodeBlockAsm512K:
LEAQ (CX)(R8*1), R9
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBlockAsm512Klarge_forward_sse_loop_32
LEAQ -32(DI)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBlockAsm512Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm512Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm512Klarge_forward_sse_loop_32:
MOVOU -32(DI)(R13*1), X4
MOVOU -16(DI)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm512Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R9, CX
match_nolits_copy_encodeBlockAsm512K:
// emitCopy
CMPL SI, $0x0001003f
JBE two_byte_offset_match_nolit_encodeBlockAsm512K
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(SI), SI
SHLL $0x0b, SI
ADDL $0x07, SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_nolit_encodeBlockAsm512K_emit3
LEAL -60(R11), DI
CMPL R11, $0x0000013c
JB emit_copy3_1_match_nolit_encodeBlockAsm512K_emit3
CMPL R11, $0x0001003c
JB emit_copy3_2_match_nolit_encodeBlockAsm512K_emit3
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL DI, 4(CX)
ADDQ $0x07, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_copy3_2_match_nolit_encodeBlockAsm512K_emit3:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW DI, 4(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_copy3_1_match_nolit_encodeBlockAsm512K_emit3:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB DI, 4(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_copy3_0_match_nolit_encodeBlockAsm512K_emit3:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
two_byte_offset_match_nolit_encodeBlockAsm512K:
CMPL SI, $0x00000400
JA two_byte_match_nolit_encodeBlockAsm512K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBlockAsm512K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_one_longer_match_nolit_encodeBlockAsm512K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBlockAsm512K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_copy1_repeat_match_nolit_encodeBlockAsm512K:
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm512K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm512K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm512K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm512K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm512K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm512K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
two_byte_match_nolit_encodeBlockAsm512K:
// emitCopy2
LEAL -64(SI), SI
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBlockAsm512K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBlockAsm512K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBlockAsm512K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_copy2_2_match_nolit_encodeBlockAsm512K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_copy2_1_match_nolit_encodeBlockAsm512K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm512K
emit_copy2_0_match_nolit_encodeBlockAsm512K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm512K:
CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm512K
MOVQ -2(BX)(DX*1), DI
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm512K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm512K:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, R9
SHLQ $0x10, R8
IMULQ SI, R8
SHRQ $0x32, R8
SHLQ $0x10, R9
IMULQ SI, R9
SHRQ $0x32, R9
LEAL -2(DX), R10
MOVL (AX)(R9*4), SI
MOVL R10, (AX)(R8*4)
MOVL DX, (AX)(R9*4)
MOVL DX, R8
INCL DX
CMPL (BX)(SI*1), DI
JNE search_loop_encodeBlockAsm512K
MOVL R8, DI
SUBL SI, DI
MOVL DI, 16(SP)
CMPQ CX, (SP)
JB dst_size_check_ok_4
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_4:
ADDL $0x03, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R8
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit2_encodeBlockAsm512K
matchlen_loopback_16_match_nolit2_encodeBlockAsm512K:
MOVQ (R8)(R11*1), R9
MOVQ 8(R8)(R11*1), R10
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm512K
XORQ 8(SI)(R11*1), R10
JNZ matchlen_bsf_16match_nolit2_encodeBlockAsm512K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit2_encodeBlockAsm512K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit2_encodeBlockAsm512K
JMP matchlen_match8_match_nolit2_encodeBlockAsm512K
matchlen_bsf_16match_nolit2_encodeBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL 8(R11)(R10*1), R11
JMP match_nolit2_end_encodeBlockAsm512K
matchlen_match8_match_nolit2_encodeBlockAsm512K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit2_encodeBlockAsm512K
MOVQ (R8)(R11*1), R9
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm512K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit2_encodeBlockAsm512K
matchlen_bsf_8_match_nolit2_encodeBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R11)(R9*1), R11
JMP match_nolit2_end_encodeBlockAsm512K
matchlen_match4_match_nolit2_encodeBlockAsm512K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit2_encodeBlockAsm512K
MOVL (R8)(R11*1), R9
CMPL (SI)(R11*1), R9
JNE matchlen_match2_match_nolit2_encodeBlockAsm512K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit2_encodeBlockAsm512K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit2_encodeBlockAsm512K
JB match_nolit2_end_encodeBlockAsm512K
MOVW (R8)(R11*1), R9
CMPW (SI)(R11*1), R9
JNE matchlen_match1_match_nolit2_encodeBlockAsm512K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit2_end_encodeBlockAsm512K
matchlen_match1_match_nolit2_encodeBlockAsm512K:
MOVB (R8)(R11*1), R9
CMPB (SI)(R11*1), R9
JNE match_nolit2_end_encodeBlockAsm512K
LEAL 1(R11), R11
match_nolit2_end_encodeBlockAsm512K:
ADDL R11, DX
ADDL $0x04, R11
MOVL DX, 12(SP)
MOVL 16(SP), SI
JMP match_nolits_copy_encodeBlockAsm512K
emit_remainder_encodeBlockAsm512K:
MOVQ src_len+32(FP), AX
MOVL 12(SP), DX
SUBL DX, AX
JZ emit_remainder_end_encodeBlockAsm512K
LEAQ (BX)(DX*1), DX
LEAQ 4(CX)(AX*1), BX
CMPQ BX, (SP)
JB dst_size_check_ok_5
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_5:
// emitLiteral
LEAL -1(AX), BX
CMPL BX, $0x1d
JB one_byte_emit_remainder_encodeBlockAsm512K
SUBL $0x1d, BX
CMPL BX, $0x00000100
JB two_bytes_emit_remainder_encodeBlockAsm512K
CMPL BX, $0x00010000
JB three_bytes_emit_remainder_encodeBlockAsm512K
MOVL BX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW BX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm512K
three_bytes_emit_remainder_encodeBlockAsm512K:
MOVB $0xf0, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm512K
two_bytes_emit_remainder_encodeBlockAsm512K:
MOVB $0xe8, (CX)
MOVB BL, 1(CX)
ADDL $0x1d, BX
ADDQ $0x02, CX
CMPL BX, $0x40
JB memmove_midemit_remainder_encodeBlockAsm512K
JMP memmove_long_emit_remainder_encodeBlockAsm512K
one_byte_emit_remainder_encodeBlockAsm512K:
SHLB $0x03, BL
MOVB BL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ AX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_3
CMPQ AX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_4through8
CMPQ AX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_8through16
CMPQ AX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_1or2:
MOVB (DX), SI
MOVB -1(DX)(AX*1), DL
MOVB SI, (CX)
MOVB DL, -1(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_3:
MOVW (DX), SI
MOVB 2(DX), DL
MOVW SI, (CX)
MOVB DL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_4through8:
MOVL (DX), SI
MOVL -4(DX)(AX*1), DX
MOVL SI, (CX)
MOVL DX, -4(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_8through16:
MOVQ (DX), SI
MOVQ -8(DX)(AX*1), DX
MOVQ SI, (CX)
MOVQ DX, -8(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBlockAsm512K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm512K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm512K
memmove_midemit_remainder_encodeBlockAsm512K:
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ AX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBlockAsm512K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm512K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBlockAsm512K
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm512K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_mid_end_copy_emit_remainder_encodeBlockAsm512K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm512K
memmove_long_emit_remainder_encodeBlockAsm512K:
LEAQ (CX)(AX*1), BX
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVQ AX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm512Klarge_forward_sse_loop_32
LEAQ -32(DX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm512Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm512Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm512Klarge_forward_sse_loop_32:
MOVOU -32(DX)(R8*1), X4
MOVOU -16(DX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ AX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm512Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
MOVQ BX, CX
emit_remainder_end_encodeBlockAsm512K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBlockAsm64K(dst []byte, src []byte, tmp *[16384]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBlockAsm64K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000080, DX
MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm64K:
MOVOU X0, (BX)
MOVOU X0, 16(BX)
MOVOU X0, 32(BX)
MOVOU X0, 48(BX)
MOVOU X0, 64(BX)
MOVOU X0, 80(BX)
MOVOU X0, 96(BX)
MOVOU X0, 112(BX)
ADDQ $0x80, BX
DECQ DX
JNZ zero_loop_encodeBlockAsm64K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), DX
LEAQ -17(DX), BX
LEAQ -17(DX), SI
MOVL SI, 8(SP)
SHRQ $0x05, DX
SUBL DX, BX
LEAQ (CX)(BX*1), BX
MOVQ BX, (SP)
MOVL $0x00000001, DX
MOVL DX, 16(SP)
MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm64K:
MOVL DX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 4(DX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm64K
MOVQ (BX)(DX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x33, R10
SHLQ $0x10, R11
IMULQ R9, R11
SHRQ $0x33, R11
MOVWLZX (AX)(R10*2), SI
MOVWLZX (AX)(R11*2), R8
MOVW DX, (AX)(R10*2)
MOVW DX, (AX)(R11*2)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x33, R10
MOVL DX, R9
SUBL 16(SP), R9
MOVL 1(BX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm64K
LEAL 1(DX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeBlockAsm64K
repeat_extend_back_loop_encodeBlockAsm64K:
CMPL DI, SI
JBE repeat_extend_back_end_encodeBlockAsm64K
MOVB -1(BX)(R8*1), R9
MOVB -1(BX)(DI*1), R10
CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm64K
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeBlockAsm64K
repeat_extend_back_end_encodeBlockAsm64K:
MOVL DI, SI
MOVL 12(SP), R8
SUBL R8, SI
LEAQ 4(CX)(SI*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_1
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_1:
LEAQ (BX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_repeat_emit_lits_encodeBlockAsm64K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_repeat_emit_lits_encodeBlockAsm64K
JB three_bytes_repeat_emit_lits_encodeBlockAsm64K
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm64K
three_bytes_repeat_emit_lits_encodeBlockAsm64K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm64K
two_bytes_repeat_emit_lits_encodeBlockAsm64K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midrepeat_emit_lits_encodeBlockAsm64K
JMP memmove_long_repeat_emit_lits_encodeBlockAsm64K
one_byte_repeat_emit_lits_encodeBlockAsm64K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm64K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_lits_encodeBlockAsm64K_memmove_move_33through64
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm64K_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm64K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm64K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_repeat_emit_lits_encodeBlockAsm64K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm64K
memmove_midrepeat_emit_lits_encodeBlockAsm64K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm64K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm64K
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm64K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm64K
memmove_long_repeat_emit_lits_encodeBlockAsm64K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R11
SHRQ $0x05, R11
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R10
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm64Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ SI, R12
JAE emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
repeat_emit_lits_end_encodeBlockAsm64K:
ADDL $0x05, DX
MOVL DX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL DX, R8
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_repeat_extend_encodeBlockAsm64K
matchlen_loopback_16_repeat_extend_encodeBlockAsm64K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm64K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm64K
LEAL -16(R8), R8
LEAL 16(R11), R11
matchlen_loop_16_entry_repeat_extend_encodeBlockAsm64K:
CMPL R8, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBlockAsm64K
JMP matchlen_match8_repeat_extend_encodeBlockAsm64K
matchlen_bsf_16repeat_extend_encodeBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm64K
matchlen_match8_repeat_extend_encodeBlockAsm64K:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm64K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm64K
LEAL -8(R8), R8
LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeBlockAsm64K
matchlen_bsf_8_repeat_extend_encodeBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm64K
matchlen_match4_repeat_extend_encodeBlockAsm64K:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm64K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeBlockAsm64K
LEAL -4(R8), R8
LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeBlockAsm64K:
CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm64K
JB repeat_extend_forward_end_encodeBlockAsm64K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeBlockAsm64K
LEAL 2(R11), R11
SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeBlockAsm64K
matchlen_match1_repeat_extend_encodeBlockAsm64K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeBlockAsm64K
LEAL 1(R11), R11
repeat_extend_forward_end_encodeBlockAsm64K:
ADDL R11, DX
MOVL DX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitRepeat
LEAL -1(SI), DI
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBlockAsm64K
LEAL -30(SI), DI
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBlockAsm64K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBlockAsm64K
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm64K
repeat_three_match_repeat_encodeBlockAsm64K:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm64K
repeat_two_match_repeat_encodeBlockAsm64K:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm64K
repeat_one_match_repeat_encodeBlockAsm64K:
XORL DI, DI
LEAL -4(DI)(SI*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBlockAsm64K:
MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm64K
no_repeat_found_encodeBlockAsm64K:
CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm64K
SHRQ $0x08, DI
MOVWLZX (AX)(R10*2), SI
LEAL 2(DX), R9
CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm64K
MOVW R9, (AX)(R10*2)
SHRQ $0x08, DI
CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm64K
MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm64K
candidate3_match_encodeBlockAsm64K:
ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm64K
candidate2_match_encodeBlockAsm64K:
MOVW R9, (AX)(R10*2)
INCL DX
MOVL R8, SI
candidate_match_encodeBlockAsm64K:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm64K
match_extend_back_loop_encodeBlockAsm64K:
CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm64K
MOVB -1(BX)(SI*1), R8
MOVB -1(BX)(DX*1), R9
CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm64K
LEAL -1(DX), DX
DECL SI
JZ match_extend_back_end_encodeBlockAsm64K
JMP match_extend_back_loop_encodeBlockAsm64K
match_extend_back_end_encodeBlockAsm64K:
CMPQ CX, (SP)
JB dst_size_check_ok_2
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_2:
MOVL DX, R8
MOVL DX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBlockAsm64K
matchlen_loopback_16_match_nolit_encodeBlockAsm64K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm64K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm64K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBlockAsm64K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBlockAsm64K
JMP matchlen_match8_match_nolit_encodeBlockAsm64K
matchlen_bsf_16match_nolit_encodeBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBlockAsm64K
matchlen_match8_match_nolit_encodeBlockAsm64K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm64K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm64K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBlockAsm64K
matchlen_bsf_8_match_nolit_encodeBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBlockAsm64K
matchlen_match4_match_nolit_encodeBlockAsm64K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm64K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBlockAsm64K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBlockAsm64K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm64K
JB match_nolit_end_encodeBlockAsm64K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBlockAsm64K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm64K
matchlen_match1_match_nolit_encodeBlockAsm64K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE match_nolit_end_encodeBlockAsm64K
LEAL 1(R11), R11
match_nolit_end_encodeBlockAsm64K:
ADDL R11, DX
ADDL $0x04, R11
MOVL 16(SP), SI
MOVL 12(SP), DI
MOVL DX, 12(SP)
SUBL DI, R8
JZ match_nolits_copy_encodeBlockAsm64K
LEAQ (BX)(DI*1), DI
CMPL R8, $0x03
JA match_emit_lits_copy_encodeBlockAsm64K
CMPL SI, $0x40
JB match_emit_lits_copy_encodeBlockAsm64K
MOVL (DI), DI
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, SI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, SI
CMOVLLT R11, SI
LEAL -1(R8)(SI*4), SI
MOVL $0x00000003, R10
LEAL (R10)(SI*8), SI
MOVB SI, (CX)
ADDQ $0x03, CX
MOVL DI, (CX)
ADDQ R8, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBlockAsm64K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBlockAsm64K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBlockAsm64K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBlockAsm64K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
repeat_three_match_emit_repeat_copy2_encodeBlockAsm64K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
repeat_two_match_emit_repeat_copy2_encodeBlockAsm64K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
repeat_one_match_emit_repeat_copy2_encodeBlockAsm64K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
match_emit_lits_copy_encodeBlockAsm64K:
LEAQ 4(CX)(R8*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_3
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_3:
// emitLiteral
LEAL -1(R8), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBlockAsm64K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm64K
JB three_bytes_match_emit_encodeBlockAsm64K
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm64K
three_bytes_match_emit_encodeBlockAsm64K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm64K
two_bytes_match_emit_encodeBlockAsm64K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBlockAsm64K
JMP memmove_long_match_emit_encodeBlockAsm64K
one_byte_match_emit_encodeBlockAsm64K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm64K_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm64K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm64K_memmove_move_8through16:
MOVOU (DI), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm64K
emit_lit_memmove_match_emit_encodeBlockAsm64K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm64K
emit_lit_memmove_match_emit_encodeBlockAsm64K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBlockAsm64K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm64K
memmove_midmatch_emit_encodeBlockAsm64K:
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBlockAsm64K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBlockAsm64K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBlockAsm64K
emit_lit_memmove_mid_match_emit_encodeBlockAsm64K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBlockAsm64K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm64K
memmove_long_match_emit_encodeBlockAsm64K:
LEAQ (CX)(R8*1), R9
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(DI)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBlockAsm64Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(DI)(R13*1), X4
MOVOU -16(DI)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R9, CX
match_nolits_copy_encodeBlockAsm64K:
// emitCopy
CMPL SI, $0x00000400
JA two_byte_match_nolit_encodeBlockAsm64K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBlockAsm64K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
emit_one_longer_match_nolit_encodeBlockAsm64K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBlockAsm64K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
emit_copy1_repeat_match_nolit_encodeBlockAsm64K:
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm64K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm64K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm64K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm64K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm64K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm64K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
two_byte_match_nolit_encodeBlockAsm64K:
// emitCopy2
LEAL -64(SI), SI
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBlockAsm64K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBlockAsm64K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBlockAsm64K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
emit_copy2_2_match_nolit_encodeBlockAsm64K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
emit_copy2_1_match_nolit_encodeBlockAsm64K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm64K
emit_copy2_0_match_nolit_encodeBlockAsm64K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm64K:
CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm64K
MOVQ -2(BX)(DX*1), DI
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm64K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm64K:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, R9
SHLQ $0x10, R8
IMULQ SI, R8
SHRQ $0x33, R8
SHLQ $0x10, R9
IMULQ SI, R9
SHRQ $0x33, R9
LEAL -2(DX), R10
MOVWLZX (AX)(R9*2), SI
MOVW R10, (AX)(R8*2)
MOVW DX, (AX)(R9*2)
MOVL DX, R8
INCL DX
CMPL (BX)(SI*1), DI
JNE search_loop_encodeBlockAsm64K
MOVL R8, DI
SUBL SI, DI
MOVL DI, 16(SP)
CMPQ CX, (SP)
JB dst_size_check_ok_4
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_4:
ADDL $0x03, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R8
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit2_encodeBlockAsm64K
matchlen_loopback_16_match_nolit2_encodeBlockAsm64K:
MOVQ (R8)(R11*1), R9
MOVQ 8(R8)(R11*1), R10
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm64K
XORQ 8(SI)(R11*1), R10
JNZ matchlen_bsf_16match_nolit2_encodeBlockAsm64K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit2_encodeBlockAsm64K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit2_encodeBlockAsm64K
JMP matchlen_match8_match_nolit2_encodeBlockAsm64K
matchlen_bsf_16match_nolit2_encodeBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL 8(R11)(R10*1), R11
JMP match_nolit2_end_encodeBlockAsm64K
matchlen_match8_match_nolit2_encodeBlockAsm64K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit2_encodeBlockAsm64K
MOVQ (R8)(R11*1), R9
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm64K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit2_encodeBlockAsm64K
matchlen_bsf_8_match_nolit2_encodeBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R11)(R9*1), R11
JMP match_nolit2_end_encodeBlockAsm64K
matchlen_match4_match_nolit2_encodeBlockAsm64K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit2_encodeBlockAsm64K
MOVL (R8)(R11*1), R9
CMPL (SI)(R11*1), R9
JNE matchlen_match2_match_nolit2_encodeBlockAsm64K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit2_encodeBlockAsm64K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit2_encodeBlockAsm64K
JB match_nolit2_end_encodeBlockAsm64K
MOVW (R8)(R11*1), R9
CMPW (SI)(R11*1), R9
JNE matchlen_match1_match_nolit2_encodeBlockAsm64K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit2_end_encodeBlockAsm64K
matchlen_match1_match_nolit2_encodeBlockAsm64K:
MOVB (R8)(R11*1), R9
CMPB (SI)(R11*1), R9
JNE match_nolit2_end_encodeBlockAsm64K
LEAL 1(R11), R11
match_nolit2_end_encodeBlockAsm64K:
ADDL R11, DX
ADDL $0x04, R11
MOVL DX, 12(SP)
MOVL 16(SP), SI
JMP match_nolits_copy_encodeBlockAsm64K
emit_remainder_encodeBlockAsm64K:
MOVQ src_len+32(FP), AX
MOVL 12(SP), DX
SUBL DX, AX
JZ emit_remainder_end_encodeBlockAsm64K
LEAQ (BX)(DX*1), DX
LEAQ 4(CX)(AX*1), BX
CMPQ BX, (SP)
JB dst_size_check_ok_5
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_5:
// emitLiteral
LEAL -1(AX), BX
CMPL BX, $0x1d
JB one_byte_emit_remainder_encodeBlockAsm64K
SUBL $0x1d, BX
CMPL BX, $0x00000100
JB two_bytes_emit_remainder_encodeBlockAsm64K
JB three_bytes_emit_remainder_encodeBlockAsm64K
MOVL BX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW BX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm64K
three_bytes_emit_remainder_encodeBlockAsm64K:
MOVB $0xf0, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm64K
two_bytes_emit_remainder_encodeBlockAsm64K:
MOVB $0xe8, (CX)
MOVB BL, 1(CX)
ADDL $0x1d, BX
ADDQ $0x02, CX
CMPL BX, $0x40
JB memmove_midemit_remainder_encodeBlockAsm64K
JMP memmove_long_emit_remainder_encodeBlockAsm64K
one_byte_emit_remainder_encodeBlockAsm64K:
SHLB $0x03, BL
MOVB BL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ AX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_3
CMPQ AX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_4through8
CMPQ AX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_8through16
CMPQ AX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_1or2:
MOVB (DX), SI
MOVB -1(DX)(AX*1), DL
MOVB SI, (CX)
MOVB DL, -1(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_3:
MOVW (DX), SI
MOVB 2(DX), DL
MOVW SI, (CX)
MOVB DL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_4through8:
MOVL (DX), SI
MOVL -4(DX)(AX*1), DX
MOVL SI, (CX)
MOVL DX, -4(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_8through16:
MOVQ (DX), SI
MOVQ -8(DX)(AX*1), DX
MOVQ SI, (CX)
MOVQ DX, -8(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBlockAsm64K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm64K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm64K
memmove_midemit_remainder_encodeBlockAsm64K:
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ AX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBlockAsm64K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm64K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBlockAsm64K
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm64K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_mid_end_copy_emit_remainder_encodeBlockAsm64K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm64K
memmove_long_emit_remainder_encodeBlockAsm64K:
LEAQ (CX)(AX*1), BX
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVQ AX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(DX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm64Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(DX)(R8*1), X4
MOVOU -16(DX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ AX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
MOVQ BX, CX
emit_remainder_end_encodeBlockAsm64K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBlockAsm16K(dst []byte, src []byte, tmp *[8192]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBlockAsm16K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000040, DX
MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm16K:
MOVOU X0, (BX)
MOVOU X0, 16(BX)
MOVOU X0, 32(BX)
MOVOU X0, 48(BX)
MOVOU X0, 64(BX)
MOVOU X0, 80(BX)
MOVOU X0, 96(BX)
MOVOU X0, 112(BX)
ADDQ $0x80, BX
DECQ DX
JNZ zero_loop_encodeBlockAsm16K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), DX
LEAQ -17(DX), BX
LEAQ -17(DX), SI
MOVL SI, 8(SP)
SHRQ $0x05, DX
SUBL DX, BX
LEAQ (CX)(BX*1), BX
MOVQ BX, (SP)
MOVL $0x00000001, DX
MOVL DX, 16(SP)
MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm16K:
MOVL DX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 4(DX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm16K
MOVQ (BX)(DX*1), DI
MOVL SI, 20(SP)
MOVQ $0x000000cf1bbcdcbb, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x18, R10
IMULQ R9, R10
SHRQ $0x34, R10
SHLQ $0x18, R11
IMULQ R9, R11
SHRQ $0x34, R11
MOVWLZX (AX)(R10*2), SI
MOVWLZX (AX)(R11*2), R8
MOVW DX, (AX)(R10*2)
MOVW DX, (AX)(R11*2)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x18, R10
IMULQ R9, R10
SHRQ $0x34, R10
MOVL DX, R9
SUBL 16(SP), R9
MOVL 1(BX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm16K
LEAL 1(DX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeBlockAsm16K
repeat_extend_back_loop_encodeBlockAsm16K:
CMPL DI, SI
JBE repeat_extend_back_end_encodeBlockAsm16K
MOVB -1(BX)(R8*1), R9
MOVB -1(BX)(DI*1), R10
CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm16K
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeBlockAsm16K
repeat_extend_back_end_encodeBlockAsm16K:
MOVL DI, SI
MOVL 12(SP), R8
SUBL R8, SI
LEAQ 3(CX)(SI*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_1
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_1:
LEAQ (BX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_repeat_emit_lits_encodeBlockAsm16K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_repeat_emit_lits_encodeBlockAsm16K
JB three_bytes_repeat_emit_lits_encodeBlockAsm16K
three_bytes_repeat_emit_lits_encodeBlockAsm16K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm16K
two_bytes_repeat_emit_lits_encodeBlockAsm16K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midrepeat_emit_lits_encodeBlockAsm16K
JMP memmove_long_repeat_emit_lits_encodeBlockAsm16K
one_byte_repeat_emit_lits_encodeBlockAsm16K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm16K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_lits_encodeBlockAsm16K_memmove_move_33through64
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm16K_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm16K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm16K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_repeat_emit_lits_encodeBlockAsm16K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm16K
memmove_midrepeat_emit_lits_encodeBlockAsm16K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm16K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm16K
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm16K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm16K
memmove_long_repeat_emit_lits_encodeBlockAsm16K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R11
SHRQ $0x05, R11
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm16Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R10
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm16Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm16Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm16Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ SI, R12
JAE emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm16Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
repeat_emit_lits_end_encodeBlockAsm16K:
ADDL $0x05, DX
MOVL DX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL DX, R8
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_repeat_extend_encodeBlockAsm16K
matchlen_loopback_16_repeat_extend_encodeBlockAsm16K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm16K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm16K
LEAL -16(R8), R8
LEAL 16(R11), R11
matchlen_loop_16_entry_repeat_extend_encodeBlockAsm16K:
CMPL R8, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBlockAsm16K
JMP matchlen_match8_repeat_extend_encodeBlockAsm16K
matchlen_bsf_16repeat_extend_encodeBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm16K
matchlen_match8_repeat_extend_encodeBlockAsm16K:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm16K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm16K
LEAL -8(R8), R8
LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeBlockAsm16K
matchlen_bsf_8_repeat_extend_encodeBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm16K
matchlen_match4_repeat_extend_encodeBlockAsm16K:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm16K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeBlockAsm16K
LEAL -4(R8), R8
LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeBlockAsm16K:
CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm16K
JB repeat_extend_forward_end_encodeBlockAsm16K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeBlockAsm16K
LEAL 2(R11), R11
SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeBlockAsm16K
matchlen_match1_repeat_extend_encodeBlockAsm16K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeBlockAsm16K
LEAL 1(R11), R11
repeat_extend_forward_end_encodeBlockAsm16K:
ADDL R11, DX
MOVL DX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitRepeat
LEAL -1(SI), DI
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBlockAsm16K
LEAL -30(SI), DI
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBlockAsm16K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBlockAsm16K
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm16K
repeat_three_match_repeat_encodeBlockAsm16K:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm16K
repeat_two_match_repeat_encodeBlockAsm16K:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm16K
repeat_one_match_repeat_encodeBlockAsm16K:
XORL DI, DI
LEAL -4(DI)(SI*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBlockAsm16K:
MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm16K
no_repeat_found_encodeBlockAsm16K:
CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm16K
SHRQ $0x08, DI
MOVWLZX (AX)(R10*2), SI
LEAL 2(DX), R9
CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm16K
MOVW R9, (AX)(R10*2)
SHRQ $0x08, DI
CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm16K
MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm16K
candidate3_match_encodeBlockAsm16K:
ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm16K
candidate2_match_encodeBlockAsm16K:
MOVW R9, (AX)(R10*2)
INCL DX
MOVL R8, SI
candidate_match_encodeBlockAsm16K:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm16K
match_extend_back_loop_encodeBlockAsm16K:
CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm16K
MOVB -1(BX)(SI*1), R8
MOVB -1(BX)(DX*1), R9
CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm16K
LEAL -1(DX), DX
DECL SI
JZ match_extend_back_end_encodeBlockAsm16K
JMP match_extend_back_loop_encodeBlockAsm16K
match_extend_back_end_encodeBlockAsm16K:
CMPQ CX, (SP)
JB dst_size_check_ok_2
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_2:
MOVL DX, R8
MOVL DX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBlockAsm16K
matchlen_loopback_16_match_nolit_encodeBlockAsm16K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm16K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm16K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBlockAsm16K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBlockAsm16K
JMP matchlen_match8_match_nolit_encodeBlockAsm16K
matchlen_bsf_16match_nolit_encodeBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBlockAsm16K
matchlen_match8_match_nolit_encodeBlockAsm16K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm16K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm16K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBlockAsm16K
matchlen_bsf_8_match_nolit_encodeBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBlockAsm16K
matchlen_match4_match_nolit_encodeBlockAsm16K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm16K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBlockAsm16K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBlockAsm16K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm16K
JB match_nolit_end_encodeBlockAsm16K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBlockAsm16K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm16K
matchlen_match1_match_nolit_encodeBlockAsm16K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE match_nolit_end_encodeBlockAsm16K
LEAL 1(R11), R11
match_nolit_end_encodeBlockAsm16K:
ADDL R11, DX
ADDL $0x04, R11
MOVL 16(SP), SI
MOVL 12(SP), DI
MOVL DX, 12(SP)
SUBL DI, R8
JZ match_nolits_copy_encodeBlockAsm16K
LEAQ (BX)(DI*1), DI
CMPL R8, $0x03
JA match_emit_lits_copy_encodeBlockAsm16K
CMPL SI, $0x40
JB match_emit_lits_copy_encodeBlockAsm16K
MOVL (DI), DI
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, SI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, SI
CMOVLLT R11, SI
LEAL -1(R8)(SI*4), SI
MOVL $0x00000003, R10
LEAL (R10)(SI*8), SI
MOVB SI, (CX)
ADDQ $0x03, CX
MOVL DI, (CX)
ADDQ R8, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBlockAsm16K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBlockAsm16K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBlockAsm16K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBlockAsm16K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
repeat_three_match_emit_repeat_copy2_encodeBlockAsm16K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
repeat_two_match_emit_repeat_copy2_encodeBlockAsm16K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
repeat_one_match_emit_repeat_copy2_encodeBlockAsm16K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
match_emit_lits_copy_encodeBlockAsm16K:
LEAQ 3(CX)(R8*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_3
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_3:
// emitLiteral
LEAL -1(R8), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBlockAsm16K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm16K
JB three_bytes_match_emit_encodeBlockAsm16K
three_bytes_match_emit_encodeBlockAsm16K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm16K
two_bytes_match_emit_encodeBlockAsm16K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBlockAsm16K
JMP memmove_long_match_emit_encodeBlockAsm16K
one_byte_match_emit_encodeBlockAsm16K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm16K_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm16K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm16K_memmove_move_8through16:
MOVOU (DI), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm16K
emit_lit_memmove_match_emit_encodeBlockAsm16K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm16K
emit_lit_memmove_match_emit_encodeBlockAsm16K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBlockAsm16K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm16K
memmove_midmatch_emit_encodeBlockAsm16K:
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBlockAsm16K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBlockAsm16K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBlockAsm16K
emit_lit_memmove_mid_match_emit_encodeBlockAsm16K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBlockAsm16K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm16K
memmove_long_match_emit_encodeBlockAsm16K:
LEAQ (CX)(R8*1), R9
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBlockAsm16Klarge_forward_sse_loop_32
LEAQ -32(DI)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBlockAsm16Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm16Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm16Klarge_forward_sse_loop_32:
MOVOU -32(DI)(R13*1), X4
MOVOU -16(DI)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm16Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R9, CX
match_nolits_copy_encodeBlockAsm16K:
// emitCopy
CMPL SI, $0x00000400
JA two_byte_match_nolit_encodeBlockAsm16K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBlockAsm16K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
emit_one_longer_match_nolit_encodeBlockAsm16K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBlockAsm16K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
emit_copy1_repeat_match_nolit_encodeBlockAsm16K:
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm16K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm16K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm16K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm16K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm16K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm16K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
two_byte_match_nolit_encodeBlockAsm16K:
// emitCopy2
LEAL -64(SI), SI
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBlockAsm16K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBlockAsm16K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBlockAsm16K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
emit_copy2_2_match_nolit_encodeBlockAsm16K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
emit_copy2_1_match_nolit_encodeBlockAsm16K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm16K
emit_copy2_0_match_nolit_encodeBlockAsm16K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm16K:
CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm16K
MOVQ -2(BX)(DX*1), DI
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm16K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm16K:
MOVQ $0x000000cf1bbcdcbb, SI
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, R9
SHLQ $0x18, R8
IMULQ SI, R8
SHRQ $0x34, R8
SHLQ $0x18, R9
IMULQ SI, R9
SHRQ $0x34, R9
LEAL -2(DX), R10
MOVWLZX (AX)(R9*2), SI
MOVW R10, (AX)(R8*2)
MOVW DX, (AX)(R9*2)
MOVL DX, R8
INCL DX
CMPL (BX)(SI*1), DI
JNE search_loop_encodeBlockAsm16K
MOVL R8, DI
SUBL SI, DI
MOVL DI, 16(SP)
CMPQ CX, (SP)
JB dst_size_check_ok_4
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_4:
ADDL $0x03, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R8
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit2_encodeBlockAsm16K
matchlen_loopback_16_match_nolit2_encodeBlockAsm16K:
MOVQ (R8)(R11*1), R9
MOVQ 8(R8)(R11*1), R10
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm16K
XORQ 8(SI)(R11*1), R10
JNZ matchlen_bsf_16match_nolit2_encodeBlockAsm16K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit2_encodeBlockAsm16K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit2_encodeBlockAsm16K
JMP matchlen_match8_match_nolit2_encodeBlockAsm16K
matchlen_bsf_16match_nolit2_encodeBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL 8(R11)(R10*1), R11
JMP match_nolit2_end_encodeBlockAsm16K
matchlen_match8_match_nolit2_encodeBlockAsm16K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit2_encodeBlockAsm16K
MOVQ (R8)(R11*1), R9
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm16K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit2_encodeBlockAsm16K
matchlen_bsf_8_match_nolit2_encodeBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R11)(R9*1), R11
JMP match_nolit2_end_encodeBlockAsm16K
matchlen_match4_match_nolit2_encodeBlockAsm16K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit2_encodeBlockAsm16K
MOVL (R8)(R11*1), R9
CMPL (SI)(R11*1), R9
JNE matchlen_match2_match_nolit2_encodeBlockAsm16K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit2_encodeBlockAsm16K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit2_encodeBlockAsm16K
JB match_nolit2_end_encodeBlockAsm16K
MOVW (R8)(R11*1), R9
CMPW (SI)(R11*1), R9
JNE matchlen_match1_match_nolit2_encodeBlockAsm16K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit2_end_encodeBlockAsm16K
matchlen_match1_match_nolit2_encodeBlockAsm16K:
MOVB (R8)(R11*1), R9
CMPB (SI)(R11*1), R9
JNE match_nolit2_end_encodeBlockAsm16K
LEAL 1(R11), R11
match_nolit2_end_encodeBlockAsm16K:
ADDL R11, DX
ADDL $0x04, R11
MOVL DX, 12(SP)
MOVL 16(SP), SI
JMP match_nolits_copy_encodeBlockAsm16K
emit_remainder_encodeBlockAsm16K:
MOVQ src_len+32(FP), AX
MOVL 12(SP), DX
SUBL DX, AX
JZ emit_remainder_end_encodeBlockAsm16K
LEAQ (BX)(DX*1), DX
LEAQ 3(CX)(AX*1), BX
CMPQ BX, (SP)
JB dst_size_check_ok_5
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_5:
// emitLiteral
LEAL -1(AX), BX
CMPL BX, $0x1d
JB one_byte_emit_remainder_encodeBlockAsm16K
SUBL $0x1d, BX
CMPL BX, $0x00000100
JB two_bytes_emit_remainder_encodeBlockAsm16K
JB three_bytes_emit_remainder_encodeBlockAsm16K
three_bytes_emit_remainder_encodeBlockAsm16K:
MOVB $0xf0, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm16K
two_bytes_emit_remainder_encodeBlockAsm16K:
MOVB $0xe8, (CX)
MOVB BL, 1(CX)
ADDL $0x1d, BX
ADDQ $0x02, CX
CMPL BX, $0x40
JB memmove_midemit_remainder_encodeBlockAsm16K
JMP memmove_long_emit_remainder_encodeBlockAsm16K
one_byte_emit_remainder_encodeBlockAsm16K:
SHLB $0x03, BL
MOVB BL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ AX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_3
CMPQ AX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_4through8
CMPQ AX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_8through16
CMPQ AX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_1or2:
MOVB (DX), SI
MOVB -1(DX)(AX*1), DL
MOVB SI, (CX)
MOVB DL, -1(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_3:
MOVW (DX), SI
MOVB 2(DX), DL
MOVW SI, (CX)
MOVB DL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_4through8:
MOVL (DX), SI
MOVL -4(DX)(AX*1), DX
MOVL SI, (CX)
MOVL DX, -4(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_8through16:
MOVQ (DX), SI
MOVQ -8(DX)(AX*1), DX
MOVQ SI, (CX)
MOVQ DX, -8(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBlockAsm16K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm16K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm16K
memmove_midemit_remainder_encodeBlockAsm16K:
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ AX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBlockAsm16K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm16K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBlockAsm16K
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm16K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_mid_end_copy_emit_remainder_encodeBlockAsm16K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm16K
memmove_long_emit_remainder_encodeBlockAsm16K:
LEAQ (CX)(AX*1), BX
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVQ AX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm16Klarge_forward_sse_loop_32
LEAQ -32(DX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm16Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm16Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm16Klarge_forward_sse_loop_32:
MOVOU -32(DX)(R8*1), X4
MOVOU -16(DX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ AX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm16Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
MOVQ BX, CX
emit_remainder_end_encodeBlockAsm16K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBlockAsm4K(dst []byte, src []byte, tmp *[2048]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBlockAsm4K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000010, DX
MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm4K:
MOVOU X0, (BX)
MOVOU X0, 16(BX)
MOVOU X0, 32(BX)
MOVOU X0, 48(BX)
MOVOU X0, 64(BX)
MOVOU X0, 80(BX)
MOVOU X0, 96(BX)
MOVOU X0, 112(BX)
ADDQ $0x80, BX
DECQ DX
JNZ zero_loop_encodeBlockAsm4K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), DX
LEAQ -17(DX), BX
LEAQ -17(DX), SI
MOVL SI, 8(SP)
SHRQ $0x05, DX
SUBL DX, BX
LEAQ (CX)(BX*1), BX
MOVQ BX, (SP)
MOVL $0x00000001, DX
MOVL DX, 16(SP)
MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm4K:
MOVL DX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 4(DX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm4K
MOVQ (BX)(DX*1), DI
MOVL SI, 20(SP)
MOVQ $0x9e3779b1, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x36, R10
SHLQ $0x20, R11
IMULQ R9, R11
SHRQ $0x36, R11
MOVWLZX (AX)(R10*2), SI
MOVWLZX (AX)(R11*2), R8
MOVW DX, (AX)(R10*2)
MOVW DX, (AX)(R11*2)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x36, R10
MOVL DX, R9
SUBL 16(SP), R9
MOVL 1(BX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm4K
LEAL 1(DX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeBlockAsm4K
repeat_extend_back_loop_encodeBlockAsm4K:
CMPL DI, SI
JBE repeat_extend_back_end_encodeBlockAsm4K
MOVB -1(BX)(R8*1), R9
MOVB -1(BX)(DI*1), R10
CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm4K
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeBlockAsm4K
repeat_extend_back_end_encodeBlockAsm4K:
MOVL DI, SI
MOVL 12(SP), R8
SUBL R8, SI
LEAQ 3(CX)(SI*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_1
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_1:
LEAQ (BX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_repeat_emit_lits_encodeBlockAsm4K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_repeat_emit_lits_encodeBlockAsm4K
JB three_bytes_repeat_emit_lits_encodeBlockAsm4K
three_bytes_repeat_emit_lits_encodeBlockAsm4K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm4K
two_bytes_repeat_emit_lits_encodeBlockAsm4K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midrepeat_emit_lits_encodeBlockAsm4K
JMP memmove_long_repeat_emit_lits_encodeBlockAsm4K
one_byte_repeat_emit_lits_encodeBlockAsm4K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm4K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_lits_encodeBlockAsm4K_memmove_move_33through64
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm4K_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm4K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm4K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_repeat_emit_lits_encodeBlockAsm4K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm4K
memmove_midrepeat_emit_lits_encodeBlockAsm4K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm4K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm4K
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm4K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm4K
memmove_long_repeat_emit_lits_encodeBlockAsm4K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R11
SHRQ $0x05, R11
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm4Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R10
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm4Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm4Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm4Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ SI, R12
JAE emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm4Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
repeat_emit_lits_end_encodeBlockAsm4K:
ADDL $0x05, DX
MOVL DX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL DX, R8
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_repeat_extend_encodeBlockAsm4K
matchlen_loopback_16_repeat_extend_encodeBlockAsm4K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4K
LEAL -16(R8), R8
LEAL 16(R11), R11
matchlen_loop_16_entry_repeat_extend_encodeBlockAsm4K:
CMPL R8, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBlockAsm4K
JMP matchlen_match8_repeat_extend_encodeBlockAsm4K
matchlen_bsf_16repeat_extend_encodeBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm4K
matchlen_match8_repeat_extend_encodeBlockAsm4K:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm4K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4K
LEAL -8(R8), R8
LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeBlockAsm4K
matchlen_bsf_8_repeat_extend_encodeBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm4K
matchlen_match4_repeat_extend_encodeBlockAsm4K:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm4K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeBlockAsm4K
LEAL -4(R8), R8
LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeBlockAsm4K:
CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm4K
JB repeat_extend_forward_end_encodeBlockAsm4K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeBlockAsm4K
LEAL 2(R11), R11
SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeBlockAsm4K
matchlen_match1_repeat_extend_encodeBlockAsm4K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeBlockAsm4K
LEAL 1(R11), R11
repeat_extend_forward_end_encodeBlockAsm4K:
ADDL R11, DX
MOVL DX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitRepeat
LEAL -1(SI), DI
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBlockAsm4K
LEAL -30(SI), DI
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBlockAsm4K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBlockAsm4K
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm4K
repeat_three_match_repeat_encodeBlockAsm4K:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm4K
repeat_two_match_repeat_encodeBlockAsm4K:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4K
repeat_one_match_repeat_encodeBlockAsm4K:
XORL DI, DI
LEAL -4(DI)(SI*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBlockAsm4K:
MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm4K
no_repeat_found_encodeBlockAsm4K:
CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm4K
SHRQ $0x08, DI
MOVWLZX (AX)(R10*2), SI
LEAL 2(DX), R9
CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm4K
MOVW R9, (AX)(R10*2)
SHRQ $0x08, DI
CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm4K
MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm4K
candidate3_match_encodeBlockAsm4K:
ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm4K
candidate2_match_encodeBlockAsm4K:
MOVW R9, (AX)(R10*2)
INCL DX
MOVL R8, SI
candidate_match_encodeBlockAsm4K:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm4K
match_extend_back_loop_encodeBlockAsm4K:
CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm4K
MOVB -1(BX)(SI*1), R8
MOVB -1(BX)(DX*1), R9
CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm4K
LEAL -1(DX), DX
DECL SI
JZ match_extend_back_end_encodeBlockAsm4K
JMP match_extend_back_loop_encodeBlockAsm4K
match_extend_back_end_encodeBlockAsm4K:
CMPQ CX, (SP)
JB dst_size_check_ok_2
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_2:
MOVL DX, R8
MOVL DX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBlockAsm4K
matchlen_loopback_16_match_nolit_encodeBlockAsm4K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBlockAsm4K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBlockAsm4K
JMP matchlen_match8_match_nolit_encodeBlockAsm4K
matchlen_bsf_16match_nolit_encodeBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBlockAsm4K
matchlen_match8_match_nolit_encodeBlockAsm4K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm4K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBlockAsm4K
matchlen_bsf_8_match_nolit_encodeBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBlockAsm4K
matchlen_match4_match_nolit_encodeBlockAsm4K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm4K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBlockAsm4K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBlockAsm4K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm4K
JB match_nolit_end_encodeBlockAsm4K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBlockAsm4K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm4K
matchlen_match1_match_nolit_encodeBlockAsm4K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE match_nolit_end_encodeBlockAsm4K
LEAL 1(R11), R11
match_nolit_end_encodeBlockAsm4K:
ADDL R11, DX
ADDL $0x04, R11
MOVL 16(SP), SI
MOVL 12(SP), DI
MOVL DX, 12(SP)
SUBL DI, R8
JZ match_nolits_copy_encodeBlockAsm4K
LEAQ (BX)(DI*1), DI
CMPL R8, $0x03
JA match_emit_lits_copy_encodeBlockAsm4K
CMPL SI, $0x40
JB match_emit_lits_copy_encodeBlockAsm4K
MOVL (DI), DI
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, SI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, SI
CMOVLLT R11, SI
LEAL -1(R8)(SI*4), SI
MOVL $0x00000003, R10
LEAL (R10)(SI*8), SI
MOVB SI, (CX)
ADDQ $0x03, CX
MOVL DI, (CX)
ADDQ R8, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBlockAsm4K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBlockAsm4K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBlockAsm4K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBlockAsm4K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
repeat_three_match_emit_repeat_copy2_encodeBlockAsm4K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
repeat_two_match_emit_repeat_copy2_encodeBlockAsm4K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
repeat_one_match_emit_repeat_copy2_encodeBlockAsm4K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
match_emit_lits_copy_encodeBlockAsm4K:
LEAQ 3(CX)(R8*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_3
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_3:
// emitLiteral
LEAL -1(R8), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBlockAsm4K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm4K
JB three_bytes_match_emit_encodeBlockAsm4K
three_bytes_match_emit_encodeBlockAsm4K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm4K
two_bytes_match_emit_encodeBlockAsm4K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBlockAsm4K
JMP memmove_long_match_emit_encodeBlockAsm4K
one_byte_match_emit_encodeBlockAsm4K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm4K_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm4K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm4K_memmove_move_8through16:
MOVOU (DI), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm4K
emit_lit_memmove_match_emit_encodeBlockAsm4K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm4K
emit_lit_memmove_match_emit_encodeBlockAsm4K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBlockAsm4K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm4K
memmove_midmatch_emit_encodeBlockAsm4K:
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBlockAsm4K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBlockAsm4K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBlockAsm4K
emit_lit_memmove_mid_match_emit_encodeBlockAsm4K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBlockAsm4K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm4K
memmove_long_match_emit_encodeBlockAsm4K:
LEAQ (CX)(R8*1), R9
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBlockAsm4Klarge_forward_sse_loop_32
LEAQ -32(DI)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBlockAsm4Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm4Klarge_forward_sse_loop_32:
MOVOU -32(DI)(R13*1), X4
MOVOU -16(DI)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R9, CX
match_nolits_copy_encodeBlockAsm4K:
// emitCopy
CMPL SI, $0x00000400
JA two_byte_match_nolit_encodeBlockAsm4K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBlockAsm4K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
emit_one_longer_match_nolit_encodeBlockAsm4K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBlockAsm4K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
emit_copy1_repeat_match_nolit_encodeBlockAsm4K:
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm4K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm4K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm4K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm4K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm4K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm4K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
two_byte_match_nolit_encodeBlockAsm4K:
// emitCopy2
LEAL -64(SI), SI
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBlockAsm4K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBlockAsm4K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBlockAsm4K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
emit_copy2_2_match_nolit_encodeBlockAsm4K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
emit_copy2_1_match_nolit_encodeBlockAsm4K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4K
emit_copy2_0_match_nolit_encodeBlockAsm4K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm4K:
CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm4K
MOVQ -2(BX)(DX*1), DI
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm4K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm4K:
MOVQ $0x9e3779b1, SI
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, R9
SHLQ $0x20, R8
IMULQ SI, R8
SHRQ $0x36, R8
SHLQ $0x20, R9
IMULQ SI, R9
SHRQ $0x36, R9
LEAL -2(DX), R10
MOVWLZX (AX)(R9*2), SI
MOVW R10, (AX)(R8*2)
MOVW DX, (AX)(R9*2)
MOVL DX, R8
INCL DX
CMPL (BX)(SI*1), DI
JNE search_loop_encodeBlockAsm4K
MOVL R8, DI
SUBL SI, DI
MOVL DI, 16(SP)
CMPQ CX, (SP)
JB dst_size_check_ok_4
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_4:
ADDL $0x03, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R8
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit2_encodeBlockAsm4K
matchlen_loopback_16_match_nolit2_encodeBlockAsm4K:
MOVQ (R8)(R11*1), R9
MOVQ 8(R8)(R11*1), R10
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm4K
XORQ 8(SI)(R11*1), R10
JNZ matchlen_bsf_16match_nolit2_encodeBlockAsm4K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit2_encodeBlockAsm4K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit2_encodeBlockAsm4K
JMP matchlen_match8_match_nolit2_encodeBlockAsm4K
matchlen_bsf_16match_nolit2_encodeBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL 8(R11)(R10*1), R11
JMP match_nolit2_end_encodeBlockAsm4K
matchlen_match8_match_nolit2_encodeBlockAsm4K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit2_encodeBlockAsm4K
MOVQ (R8)(R11*1), R9
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm4K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit2_encodeBlockAsm4K
matchlen_bsf_8_match_nolit2_encodeBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R11)(R9*1), R11
JMP match_nolit2_end_encodeBlockAsm4K
matchlen_match4_match_nolit2_encodeBlockAsm4K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit2_encodeBlockAsm4K
MOVL (R8)(R11*1), R9
CMPL (SI)(R11*1), R9
JNE matchlen_match2_match_nolit2_encodeBlockAsm4K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit2_encodeBlockAsm4K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit2_encodeBlockAsm4K
JB match_nolit2_end_encodeBlockAsm4K
MOVW (R8)(R11*1), R9
CMPW (SI)(R11*1), R9
JNE matchlen_match1_match_nolit2_encodeBlockAsm4K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit2_end_encodeBlockAsm4K
matchlen_match1_match_nolit2_encodeBlockAsm4K:
MOVB (R8)(R11*1), R9
CMPB (SI)(R11*1), R9
JNE match_nolit2_end_encodeBlockAsm4K
LEAL 1(R11), R11
match_nolit2_end_encodeBlockAsm4K:
ADDL R11, DX
ADDL $0x04, R11
MOVL DX, 12(SP)
MOVL 16(SP), SI
JMP match_nolits_copy_encodeBlockAsm4K
emit_remainder_encodeBlockAsm4K:
MOVQ src_len+32(FP), AX
MOVL 12(SP), DX
SUBL DX, AX
JZ emit_remainder_end_encodeBlockAsm4K
LEAQ (BX)(DX*1), DX
LEAQ 3(CX)(AX*1), BX
CMPQ BX, (SP)
JB dst_size_check_ok_5
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_5:
// emitLiteral
LEAL -1(AX), BX
CMPL BX, $0x1d
JB one_byte_emit_remainder_encodeBlockAsm4K
SUBL $0x1d, BX
CMPL BX, $0x00000100
JB two_bytes_emit_remainder_encodeBlockAsm4K
JB three_bytes_emit_remainder_encodeBlockAsm4K
three_bytes_emit_remainder_encodeBlockAsm4K:
MOVB $0xf0, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm4K
two_bytes_emit_remainder_encodeBlockAsm4K:
MOVB $0xe8, (CX)
MOVB BL, 1(CX)
ADDL $0x1d, BX
ADDQ $0x02, CX
CMPL BX, $0x40
JB memmove_midemit_remainder_encodeBlockAsm4K
JMP memmove_long_emit_remainder_encodeBlockAsm4K
one_byte_emit_remainder_encodeBlockAsm4K:
SHLB $0x03, BL
MOVB BL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ AX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_3
CMPQ AX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_4through8
CMPQ AX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_8through16
CMPQ AX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_1or2:
MOVB (DX), SI
MOVB -1(DX)(AX*1), DL
MOVB SI, (CX)
MOVB DL, -1(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_3:
MOVW (DX), SI
MOVB 2(DX), DL
MOVW SI, (CX)
MOVB DL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_4through8:
MOVL (DX), SI
MOVL -4(DX)(AX*1), DX
MOVL SI, (CX)
MOVL DX, -4(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_8through16:
MOVQ (DX), SI
MOVQ -8(DX)(AX*1), DX
MOVQ SI, (CX)
MOVQ DX, -8(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBlockAsm4K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm4K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm4K
memmove_midemit_remainder_encodeBlockAsm4K:
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ AX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBlockAsm4K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm4K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBlockAsm4K
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm4K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_mid_end_copy_emit_remainder_encodeBlockAsm4K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm4K
memmove_long_emit_remainder_encodeBlockAsm4K:
LEAQ (CX)(AX*1), BX
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVQ AX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4Klarge_forward_sse_loop_32
LEAQ -32(DX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm4Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm4Klarge_forward_sse_loop_32:
MOVOU -32(DX)(R8*1), X4
MOVOU -16(DX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ AX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
MOVQ BX, CX
emit_remainder_end_encodeBlockAsm4K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBlockAsm1K(dst []byte, src []byte, tmp *[1024]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBlockAsm1K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000008, DX
MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm1K:
MOVOU X0, (BX)
MOVOU X0, 16(BX)
MOVOU X0, 32(BX)
MOVOU X0, 48(BX)
MOVOU X0, 64(BX)
MOVOU X0, 80(BX)
MOVOU X0, 96(BX)
MOVOU X0, 112(BX)
ADDQ $0x80, BX
DECQ DX
JNZ zero_loop_encodeBlockAsm1K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), DX
LEAQ -17(DX), BX
LEAQ -17(DX), SI
MOVL SI, 8(SP)
SHRQ $0x05, DX
SUBL DX, BX
LEAQ (CX)(BX*1), BX
MOVQ BX, (SP)
MOVL $0x00000001, DX
MOVL DX, 16(SP)
MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm1K:
MOVL DX, SI
SUBL 12(SP), SI
SHRL $0x04, SI
LEAL 4(DX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm1K
MOVQ (BX)(DX*1), DI
MOVL SI, 20(SP)
MOVQ $0x9e3779b1, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x37, R10
SHLQ $0x20, R11
IMULQ R9, R11
SHRQ $0x37, R11
MOVWLZX (AX)(R10*2), SI
MOVWLZX (AX)(R11*2), R8
MOVW DX, (AX)(R10*2)
MOVW DX, (AX)(R11*2)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x37, R10
MOVL DX, R9
SUBL 16(SP), R9
MOVL 1(BX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm1K
LEAL 1(DX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeBlockAsm1K
repeat_extend_back_loop_encodeBlockAsm1K:
CMPL DI, SI
JBE repeat_extend_back_end_encodeBlockAsm1K
MOVB -1(BX)(R8*1), R9
MOVB -1(BX)(DI*1), R10
CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm1K
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeBlockAsm1K
repeat_extend_back_end_encodeBlockAsm1K:
MOVL DI, SI
MOVL 12(SP), R8
SUBL R8, SI
LEAQ 3(CX)(SI*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_1
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_1:
LEAQ (BX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_repeat_emit_lits_encodeBlockAsm1K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_repeat_emit_lits_encodeBlockAsm1K
JB three_bytes_repeat_emit_lits_encodeBlockAsm1K
three_bytes_repeat_emit_lits_encodeBlockAsm1K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_repeat_emit_lits_encodeBlockAsm1K
two_bytes_repeat_emit_lits_encodeBlockAsm1K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midrepeat_emit_lits_encodeBlockAsm1K
JMP memmove_long_repeat_emit_lits_encodeBlockAsm1K
one_byte_repeat_emit_lits_encodeBlockAsm1K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm1K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_repeat_emit_lits_encodeBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_lits_encodeBlockAsm1K_memmove_move_33through64
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm1K_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm1K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_repeat_emit_lits_encodeBlockAsm1K
emit_lit_memmove_repeat_emit_lits_encodeBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_repeat_emit_lits_encodeBlockAsm1K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm1K
memmove_midrepeat_emit_lits_encodeBlockAsm1K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm1K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm1K
emit_lit_memmove_mid_repeat_emit_lits_encodeBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_repeat_emit_lits_encodeBlockAsm1K:
MOVQ R9, CX
JMP repeat_emit_lits_end_encodeBlockAsm1K
memmove_long_repeat_emit_lits_encodeBlockAsm1K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R11
SHRQ $0x05, R11
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm1Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R10
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm1Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm1Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm1Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ SI, R12
JAE emit_lit_memmove_long_repeat_emit_lits_encodeBlockAsm1Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
repeat_emit_lits_end_encodeBlockAsm1K:
ADDL $0x05, DX
MOVL DX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL DX, R8
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_repeat_extend_encodeBlockAsm1K
matchlen_loopback_16_repeat_extend_encodeBlockAsm1K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm1K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm1K
LEAL -16(R8), R8
LEAL 16(R11), R11
matchlen_loop_16_entry_repeat_extend_encodeBlockAsm1K:
CMPL R8, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBlockAsm1K
JMP matchlen_match8_repeat_extend_encodeBlockAsm1K
matchlen_bsf_16repeat_extend_encodeBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm1K
matchlen_match8_repeat_extend_encodeBlockAsm1K:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm1K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm1K
LEAL -8(R8), R8
LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeBlockAsm1K
matchlen_bsf_8_repeat_extend_encodeBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm1K
matchlen_match4_repeat_extend_encodeBlockAsm1K:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm1K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeBlockAsm1K
LEAL -4(R8), R8
LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeBlockAsm1K:
CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm1K
JB repeat_extend_forward_end_encodeBlockAsm1K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeBlockAsm1K
LEAL 2(R11), R11
SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeBlockAsm1K
matchlen_match1_repeat_extend_encodeBlockAsm1K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeBlockAsm1K
LEAL 1(R11), R11
repeat_extend_forward_end_encodeBlockAsm1K:
ADDL R11, DX
MOVL DX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitRepeat
LEAL -1(SI), DI
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBlockAsm1K
LEAL -30(SI), DI
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBlockAsm1K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBlockAsm1K
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm1K
repeat_three_match_repeat_encodeBlockAsm1K:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm1K
repeat_two_match_repeat_encodeBlockAsm1K:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm1K
repeat_one_match_repeat_encodeBlockAsm1K:
XORL DI, DI
LEAL -4(DI)(SI*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBlockAsm1K:
MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm1K
no_repeat_found_encodeBlockAsm1K:
CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm1K
SHRQ $0x08, DI
MOVWLZX (AX)(R10*2), SI
LEAL 2(DX), R9
CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm1K
MOVW R9, (AX)(R10*2)
SHRQ $0x08, DI
CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm1K
MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm1K
candidate3_match_encodeBlockAsm1K:
ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm1K
candidate2_match_encodeBlockAsm1K:
MOVW R9, (AX)(R10*2)
INCL DX
MOVL R8, SI
candidate_match_encodeBlockAsm1K:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm1K
match_extend_back_loop_encodeBlockAsm1K:
CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm1K
MOVB -1(BX)(SI*1), R8
MOVB -1(BX)(DX*1), R9
CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm1K
LEAL -1(DX), DX
DECL SI
JZ match_extend_back_end_encodeBlockAsm1K
JMP match_extend_back_loop_encodeBlockAsm1K
match_extend_back_end_encodeBlockAsm1K:
CMPQ CX, (SP)
JB dst_size_check_ok_2
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_2:
MOVL DX, R8
MOVL DX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R9
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBlockAsm1K
matchlen_loopback_16_match_nolit_encodeBlockAsm1K:
MOVQ (R9)(R11*1), R10
MOVQ 8(R9)(R11*1), R12
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm1K
XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm1K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBlockAsm1K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBlockAsm1K
JMP matchlen_match8_match_nolit_encodeBlockAsm1K
matchlen_bsf_16match_nolit_encodeBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBlockAsm1K
matchlen_match8_match_nolit_encodeBlockAsm1K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm1K
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm1K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBlockAsm1K
matchlen_bsf_8_match_nolit_encodeBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBlockAsm1K
matchlen_match4_match_nolit_encodeBlockAsm1K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm1K
MOVL (R9)(R11*1), R10
CMPL (SI)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBlockAsm1K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBlockAsm1K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm1K
JB match_nolit_end_encodeBlockAsm1K
MOVW (R9)(R11*1), R10
CMPW (SI)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBlockAsm1K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm1K
matchlen_match1_match_nolit_encodeBlockAsm1K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE match_nolit_end_encodeBlockAsm1K
LEAL 1(R11), R11
match_nolit_end_encodeBlockAsm1K:
ADDL R11, DX
ADDL $0x04, R11
MOVL 16(SP), SI
MOVL 12(SP), DI
MOVL DX, 12(SP)
SUBL DI, R8
JZ match_nolits_copy_encodeBlockAsm1K
LEAQ (BX)(DI*1), DI
CMPL R8, $0x03
JA match_emit_lits_copy_encodeBlockAsm1K
CMPL SI, $0x40
JB match_emit_lits_copy_encodeBlockAsm1K
MOVL (DI), DI
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, SI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, SI
CMOVLLT R11, SI
LEAL -1(R8)(SI*4), SI
MOVL $0x00000003, R10
LEAL (R10)(SI*8), SI
MOVB SI, (CX)
ADDQ $0x03, CX
MOVL DI, (CX)
ADDQ R8, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBlockAsm1K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBlockAsm1K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBlockAsm1K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBlockAsm1K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
repeat_three_match_emit_repeat_copy2_encodeBlockAsm1K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
repeat_two_match_emit_repeat_copy2_encodeBlockAsm1K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
repeat_one_match_emit_repeat_copy2_encodeBlockAsm1K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
match_emit_lits_copy_encodeBlockAsm1K:
LEAQ 3(CX)(R8*1), R9
CMPQ R9, (SP)
JB dst_size_check_ok_3
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_3:
// emitLiteral
LEAL -1(R8), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBlockAsm1K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm1K
JB three_bytes_match_emit_encodeBlockAsm1K
three_bytes_match_emit_encodeBlockAsm1K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBlockAsm1K
two_bytes_match_emit_encodeBlockAsm1K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBlockAsm1K
JMP memmove_long_match_emit_encodeBlockAsm1K
one_byte_match_emit_encodeBlockAsm1K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm1K_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm1K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm1K_memmove_move_8through16:
MOVOU (DI), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm1K
emit_lit_memmove_match_emit_encodeBlockAsm1K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm1K
emit_lit_memmove_match_emit_encodeBlockAsm1K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBlockAsm1K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm1K
memmove_midmatch_emit_encodeBlockAsm1K:
LEAQ (CX)(R8*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBlockAsm1K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBlockAsm1K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBlockAsm1K
emit_lit_memmove_mid_match_emit_encodeBlockAsm1K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBlockAsm1K:
MOVQ R9, CX
JMP match_nolits_copy_encodeBlockAsm1K
memmove_long_match_emit_encodeBlockAsm1K:
LEAQ (CX)(R8*1), R9
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R8*1), X2
MOVOU -16(DI)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBlockAsm1Klarge_forward_sse_loop_32
LEAQ -32(DI)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBlockAsm1Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm1Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm1Klarge_forward_sse_loop_32:
MOVOU -32(DI)(R13*1), X4
MOVOU -16(DI)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm1Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R9, CX
match_nolits_copy_encodeBlockAsm1K:
// emitCopy
CMPL SI, $0x00000400
JA two_byte_match_nolit_encodeBlockAsm1K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBlockAsm1K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
emit_one_longer_match_nolit_encodeBlockAsm1K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBlockAsm1K
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
emit_copy1_repeat_match_nolit_encodeBlockAsm1K:
LEAL -1(SI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm1K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm1K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm1K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBlockAsm1K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBlockAsm1K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBlockAsm1K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
two_byte_match_nolit_encodeBlockAsm1K:
// emitCopy2
LEAL -64(SI), SI
LEAL -4(R11), R11
MOVW SI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBlockAsm1K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBlockAsm1K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBlockAsm1K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
emit_copy2_2_match_nolit_encodeBlockAsm1K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
emit_copy2_1_match_nolit_encodeBlockAsm1K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm1K
emit_copy2_0_match_nolit_encodeBlockAsm1K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm1K:
CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm1K
MOVQ -2(BX)(DX*1), DI
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm1K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm1K:
MOVQ $0x9e3779b1, SI
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, R9
SHLQ $0x20, R8
IMULQ SI, R8
SHRQ $0x37, R8
SHLQ $0x20, R9
IMULQ SI, R9
SHRQ $0x37, R9
LEAL -2(DX), R10
MOVWLZX (AX)(R9*2), SI
MOVW R10, (AX)(R8*2)
MOVW DX, (AX)(R9*2)
MOVL DX, R8
INCL DX
CMPL (BX)(SI*1), DI
JNE search_loop_encodeBlockAsm1K
MOVL R8, DI
SUBL SI, DI
MOVL DI, 16(SP)
CMPQ CX, (SP)
JB dst_size_check_ok_4
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_4:
ADDL $0x03, DX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL DX, DI
LEAQ (BX)(DX*1), R8
LEAQ (BX)(SI*1), SI
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit2_encodeBlockAsm1K
matchlen_loopback_16_match_nolit2_encodeBlockAsm1K:
MOVQ (R8)(R11*1), R9
MOVQ 8(R8)(R11*1), R10
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm1K
XORQ 8(SI)(R11*1), R10
JNZ matchlen_bsf_16match_nolit2_encodeBlockAsm1K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit2_encodeBlockAsm1K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit2_encodeBlockAsm1K
JMP matchlen_match8_match_nolit2_encodeBlockAsm1K
matchlen_bsf_16match_nolit2_encodeBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL 8(R11)(R10*1), R11
JMP match_nolit2_end_encodeBlockAsm1K
matchlen_match8_match_nolit2_encodeBlockAsm1K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit2_encodeBlockAsm1K
MOVQ (R8)(R11*1), R9
XORQ (SI)(R11*1), R9
JNZ matchlen_bsf_8_match_nolit2_encodeBlockAsm1K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit2_encodeBlockAsm1K
matchlen_bsf_8_match_nolit2_encodeBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R11)(R9*1), R11
JMP match_nolit2_end_encodeBlockAsm1K
matchlen_match4_match_nolit2_encodeBlockAsm1K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit2_encodeBlockAsm1K
MOVL (R8)(R11*1), R9
CMPL (SI)(R11*1), R9
JNE matchlen_match2_match_nolit2_encodeBlockAsm1K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit2_encodeBlockAsm1K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit2_encodeBlockAsm1K
JB match_nolit2_end_encodeBlockAsm1K
MOVW (R8)(R11*1), R9
CMPW (SI)(R11*1), R9
JNE matchlen_match1_match_nolit2_encodeBlockAsm1K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit2_end_encodeBlockAsm1K
matchlen_match1_match_nolit2_encodeBlockAsm1K:
MOVB (R8)(R11*1), R9
CMPB (SI)(R11*1), R9
JNE match_nolit2_end_encodeBlockAsm1K
LEAL 1(R11), R11
match_nolit2_end_encodeBlockAsm1K:
ADDL R11, DX
ADDL $0x04, R11
MOVL DX, 12(SP)
MOVL 16(SP), SI
JMP match_nolits_copy_encodeBlockAsm1K
emit_remainder_encodeBlockAsm1K:
MOVQ src_len+32(FP), AX
MOVL 12(SP), DX
SUBL DX, AX
JZ emit_remainder_end_encodeBlockAsm1K
LEAQ (BX)(DX*1), DX
LEAQ 3(CX)(AX*1), BX
CMPQ BX, (SP)
JB dst_size_check_ok_5
MOVQ $0x00000000, ret+56(FP)
RET
dst_size_check_ok_5:
// emitLiteral
LEAL -1(AX), BX
CMPL BX, $0x1d
JB one_byte_emit_remainder_encodeBlockAsm1K
SUBL $0x1d, BX
CMPL BX, $0x00000100
JB two_bytes_emit_remainder_encodeBlockAsm1K
JB three_bytes_emit_remainder_encodeBlockAsm1K
three_bytes_emit_remainder_encodeBlockAsm1K:
MOVB $0xf0, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, BX
JMP memmove_long_emit_remainder_encodeBlockAsm1K
two_bytes_emit_remainder_encodeBlockAsm1K:
MOVB $0xe8, (CX)
MOVB BL, 1(CX)
ADDL $0x1d, BX
ADDQ $0x02, CX
CMPL BX, $0x40
JB memmove_midemit_remainder_encodeBlockAsm1K
JMP memmove_long_emit_remainder_encodeBlockAsm1K
one_byte_emit_remainder_encodeBlockAsm1K:
SHLB $0x03, BL
MOVB BL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ AX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_3
CMPQ AX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_4through8
CMPQ AX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_8through16
CMPQ AX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_1or2:
MOVB (DX), SI
MOVB -1(DX)(AX*1), DL
MOVB SI, (CX)
MOVB DL, -1(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_3:
MOVW (DX), SI
MOVB 2(DX), DL
MOVW SI, (CX)
MOVB DL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_4through8:
MOVL (DX), SI
MOVL -4(DX)(AX*1), DX
MOVL SI, (CX)
MOVL DX, -4(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_8through16:
MOVQ (DX), SI
MOVQ -8(DX)(AX*1), DX
MOVQ SI, (CX)
MOVQ DX, -8(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBlockAsm1K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm1K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm1K
memmove_midemit_remainder_encodeBlockAsm1K:
LEAQ (CX)(AX*1), BX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ AX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBlockAsm1K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm1K_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(AX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(AX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBlockAsm1K
emit_lit_memmove_mid_emit_remainder_encodeBlockAsm1K_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
memmove_mid_end_copy_emit_remainder_encodeBlockAsm1K:
MOVQ BX, CX
JMP emit_remainder_end_encodeBlockAsm1K
memmove_long_emit_remainder_encodeBlockAsm1K:
LEAQ (CX)(AX*1), BX
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(AX*1), X2
MOVOU -16(DX)(AX*1), X3
MOVQ AX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm1Klarge_forward_sse_loop_32
LEAQ -32(DX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm1Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm1Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm1Klarge_forward_sse_loop_32:
MOVOU -32(DX)(R8*1), X4
MOVOU -16(DX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ AX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm1Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(AX*1)
MOVOU X3, -16(CX)(AX*1)
MOVQ BX, CX
emit_remainder_end_encodeBlockAsm1K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBetterBlockAsm(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00001200, DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm:
MOVOU X0, (AX)
MOVOU X0, 16(AX)
MOVOU X0, 32(AX)
MOVOU X0, 48(AX)
MOVOU X0, 64(AX)
MOVOU X0, 80(AX)
MOVOU X0, 96(AX)
MOVOU X0, 112(AX)
ADDQ $0x80, AX
DECQ DX
JNZ zero_loop_encodeBetterBlockAsm
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), AX
LEAQ -17(AX), DX
LEAQ -17(AX), DI
MOVL DI, 8(SP)
SHRQ $0x05, AX
SUBL AX, DX
LEAQ (CX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, AX
MOVL AX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm:
MOVQ tmp+48(FP), DI
MOVL AX, R8
SUBL 12(SP), R8
SHRL $0x08, R8
CMPL R8, $0x63
JBE check_maxskip_ok_encodeBetterBlockAsm
LEAL 100(AX), R8
JMP check_maxskip_cont_encodeBetterBlockAsm
check_maxskip_ok_encodeBetterBlockAsm:
LEAL 1(AX)(R8*1), R8
check_maxskip_cont_encodeBetterBlockAsm:
CMPL R8, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm
MOVQ (DX)(AX*1), R9
MOVL R8, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R11
MOVQ $0x9e3779b1, R8
MOVQ R9, R12
MOVQ R9, R13
SHLQ $0x08, R12
IMULQ R11, R12
SHRQ $0x2f, R12
SHLQ $0x20, R13
IMULQ R8, R13
SHRQ $0x32, R13
MOVL (DI)(R12*4), R8
MOVL 524288(DI)(R13*4), R10
MOVL AX, (DI)(R12*4)
MOVL AX, 524288(DI)(R13*4)
LEAL -2162685(AX), R12
CMPL R8, R12
JLE offset_ok_0_encodeBetterBlockAsm
MOVQ (DX)(R8*1), BX
CMPQ BX, R9
JEQ candidate_match_encodeBetterBlockAsm
offset_ok_0_encodeBetterBlockAsm:
CMPL R10, R12
JLE offset_ok_1_encodeBetterBlockAsm
MOVQ (DX)(R10*1), SI
CMPQ SI, R9
offset_ok_1_encodeBetterBlockAsm:
MOVL AX, R13
SUBL 16(SP), R13
MOVQ (DX)(R13*1), R13
MOVQ $0x000000ffffffff00, R14
XORQ R9, R13
TESTQ R14, R13
JNE no_repeat_found_encodeBetterBlockAsm
LEAL 1(AX), DI
MOVL 12(SP), R8
MOVL DI, R9
SUBL 16(SP), R9
JZ repeat_extend_back_end_encodeBetterBlockAsm
repeat_extend_back_loop_encodeBetterBlockAsm:
CMPL DI, R8
JBE repeat_extend_back_end_encodeBetterBlockAsm
MOVB -1(DX)(R9*1), R10
MOVB -1(DX)(DI*1), R11
CMPB R10, R11
JNE repeat_extend_back_end_encodeBetterBlockAsm
LEAL -1(DI), DI
DECL R9
JNZ repeat_extend_back_loop_encodeBetterBlockAsm
repeat_extend_back_end_encodeBetterBlockAsm:
MOVL DI, R8
SUBL 12(SP), R8
LEAQ 4(CX)(R8*1), R8
CMPQ R8, (SP)
JB repeat_dst_size_check_encodeBetterBlockAsm
MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBetterBlockAsm:
// emitLiteralsDstP
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_repeat_emit_encodeBetterBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), R10
SUBL R8, R9
// emitLiteral
LEAL -1(R9), R8
CMPL R8, $0x1d
JB one_byte_repeat_emit_encodeBetterBlockAsm
SUBL $0x1d, R8
CMPL R8, $0x00000100
JB two_bytes_repeat_emit_encodeBetterBlockAsm
CMPL R8, $0x00010000
JB three_bytes_repeat_emit_encodeBetterBlockAsm
MOVL R8, R11
SHRL $0x10, R11
MOVB $0xf8, (CX)
MOVW R8, 1(CX)
MOVB R11, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R8
JMP memmove_long_repeat_emit_encodeBetterBlockAsm
three_bytes_repeat_emit_encodeBetterBlockAsm:
MOVB $0xf0, (CX)
MOVW R8, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R8
JMP memmove_long_repeat_emit_encodeBetterBlockAsm
two_bytes_repeat_emit_encodeBetterBlockAsm:
MOVB $0xe8, (CX)
MOVB R8, 1(CX)
ADDL $0x1d, R8
ADDQ $0x02, CX
CMPL R8, $0x40
JB memmove_midrepeat_emit_encodeBetterBlockAsm
JMP memmove_long_repeat_emit_encodeBetterBlockAsm
one_byte_repeat_emit_encodeBetterBlockAsm:
SHLB $0x03, R8
MOVB R8, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R9*1), R8
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm_memmove_move_8through16:
MOVOU (R10), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R9*1)
MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_repeat_emit_encodeBetterBlockAsm:
MOVQ R8, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm
memmove_midrepeat_emit_encodeBetterBlockAsm:
LEAQ (CX)(R9*1), R8
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R9, $0x20
JBE emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R9*1)
JMP memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R9*1)
MOVOU X3, -16(CX)(R9*1)
memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm:
MOVQ R8, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm
memmove_long_repeat_emit_encodeBetterBlockAsm:
LEAQ (CX)(R9*1), R8
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R12
SHRQ $0x05, R12
MOVQ CX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R13
SUBQ R11, R13
DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R10)(R13*1), R11
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R10)(R13*1), X4
MOVOU -16(R10)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R9*1)
MOVOU X3, -16(CX)(R9*1)
MOVQ R8, CX
emit_literal_done_repeat_emit_encodeBetterBlockAsm:
ADDL $0x05, AX
MOVL AX, R8
SUBL 16(SP), R8
MOVQ src_len+32(FP), R9
SUBL AX, R9
LEAQ (DX)(AX*1), R10
LEAQ (DX)(R8*1), R8
// matchLen
XORL R12, R12
JMP matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm
matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm:
MOVQ (R10)(R12*1), R11
MOVQ 8(R10)(R12*1), R13
XORQ (R8)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm
XORQ 8(R8)(R12*1), R13
JNZ matchlen_bsf_16repeat_extend_encodeBetterBlockAsm
LEAL -16(R9), R9
LEAL 16(R12), R12
matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm:
CMPL R9, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm
JMP matchlen_match8_repeat_extend_encodeBetterBlockAsm
matchlen_bsf_16repeat_extend_encodeBetterBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R13, R13
#else
BSFQ R13, R13
#endif
SARQ $0x03, R13
LEAL 8(R12)(R13*1), R12
JMP repeat_extend_forward_end_encodeBetterBlockAsm
matchlen_match8_repeat_extend_encodeBetterBlockAsm:
CMPL R9, $0x08
JB matchlen_match4_repeat_extend_encodeBetterBlockAsm
MOVQ (R10)(R12*1), R11
XORQ (R8)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm
LEAL -8(R9), R9
LEAL 8(R12), R12
JMP matchlen_match4_repeat_extend_encodeBetterBlockAsm
matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R11, R11
#else
BSFQ R11, R11
#endif
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBetterBlockAsm
matchlen_match4_repeat_extend_encodeBetterBlockAsm:
CMPL R9, $0x04
JB matchlen_match2_repeat_extend_encodeBetterBlockAsm
MOVL (R10)(R12*1), R11
CMPL (R8)(R12*1), R11
JNE matchlen_match2_repeat_extend_encodeBetterBlockAsm
LEAL -4(R9), R9
LEAL 4(R12), R12
matchlen_match2_repeat_extend_encodeBetterBlockAsm:
CMPL R9, $0x01
JE matchlen_match1_repeat_extend_encodeBetterBlockAsm
JB repeat_extend_forward_end_encodeBetterBlockAsm
MOVW (R10)(R12*1), R11
CMPW (R8)(R12*1), R11
JNE matchlen_match1_repeat_extend_encodeBetterBlockAsm
LEAL 2(R12), R12
SUBL $0x02, R9
JZ repeat_extend_forward_end_encodeBetterBlockAsm
matchlen_match1_repeat_extend_encodeBetterBlockAsm:
MOVB (R10)(R12*1), R11
CMPB (R8)(R12*1), R11
JNE repeat_extend_forward_end_encodeBetterBlockAsm
LEAL 1(R12), R12
repeat_extend_forward_end_encodeBetterBlockAsm:
ADDL R12, AX
MOVL AX, R8
SUBL DI, R8
MOVL 16(SP), DI
// emitRepeat
LEAL -1(R8), DI
CMPL R8, $0x1d
JBE repeat_one_match_repeat_encodeBetterBlockAsm
LEAL -30(R8), DI
CMPL R8, $0x0000011e
JB repeat_two_match_repeat_encodeBetterBlockAsm
CMPL R8, $0x0001001e
JB repeat_three_match_repeat_encodeBetterBlockAsm
MOVB $0xfc, (CX)
MOVL DI, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBetterBlockAsm
repeat_three_match_repeat_encodeBetterBlockAsm:
MOVB $0xf4, (CX)
MOVW DI, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBetterBlockAsm
repeat_two_match_repeat_encodeBetterBlockAsm:
MOVB $0xec, (CX)
MOVB DI, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBetterBlockAsm
repeat_one_match_repeat_encodeBetterBlockAsm:
XORL DI, DI
LEAL -4(DI)(R8*8), DI
MOVB DI, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBetterBlockAsm:
MOVL AX, 12(SP)
JMP search_loop_encodeBetterBlockAsm
no_repeat_found_encodeBetterBlockAsm:
CMPL R8, R12
JLE offset_ok_2_encodeBetterBlockAsm
CMPL BX, R9
JEQ candidate_match_encodeBetterBlockAsm
offset_ok_2_encodeBetterBlockAsm:
CMPL R10, R12
JLE offset_ok_3_encodeBetterBlockAsm
CMPL SI, R9
JEQ candidateS_match_encodeBetterBlockAsm
offset_ok_3_encodeBetterBlockAsm:
MOVL 20(SP), AX
JMP search_loop_encodeBetterBlockAsm
candidateS_match_encodeBetterBlockAsm:
SHRQ $0x08, R9
MOVQ R9, R13
SHLQ $0x08, R13
IMULQ R11, R13
SHRQ $0x2f, R13
MOVL (DI)(R13*4), R8
INCL AX
MOVL AX, (DI)(R13*4)
CMPL R8, R12
JLE offset_ok_4_encodeBetterBlockAsm
CMPL (DX)(R8*1), R9
JEQ candidate_match_encodeBetterBlockAsm
offset_ok_4_encodeBetterBlockAsm:
DECL AX
MOVL R10, R8
candidate_match_encodeBetterBlockAsm:
MOVL 12(SP), DI
TESTL R8, R8
JZ match_extend_back_end_encodeBetterBlockAsm
match_extend_back_loop_encodeBetterBlockAsm:
CMPL AX, DI
JBE match_extend_back_end_encodeBetterBlockAsm
MOVB -1(DX)(R8*1), R9
MOVB -1(DX)(AX*1), R10
CMPB R9, R10
JNE match_extend_back_end_encodeBetterBlockAsm
LEAL -1(AX), AX
DECL R8
JZ match_extend_back_end_encodeBetterBlockAsm
JMP match_extend_back_loop_encodeBetterBlockAsm
match_extend_back_end_encodeBetterBlockAsm:
MOVL AX, DI
SUBL 12(SP), DI
LEAQ 4(CX)(DI*1), DI
CMPQ DI, (SP)
JB match_dst_size_check_encodeBetterBlockAsm
MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm:
MOVL AX, DI
ADDL $0x04, AX
ADDL $0x04, R8
MOVQ src_len+32(FP), R9
SUBL AX, R9
LEAQ (DX)(AX*1), R10
LEAQ (DX)(R8*1), R11
// matchLen
XORL R13, R13
JMP matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
MOVQ (R10)(R13*1), R12
MOVQ 8(R10)(R13*1), R14
XORQ (R11)(R13*1), R12
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
XORQ 8(R11)(R13*1), R14
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm
LEAL -16(R9), R9
LEAL 16(R13), R13
matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm:
CMPL R9, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
JMP matchlen_match8_match_nolit_encodeBetterBlockAsm
matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R14, R14
#else
BSFQ R14, R14
#endif
SARQ $0x03, R14
LEAL 8(R13)(R14*1), R13
JMP match_nolit_end_encodeBetterBlockAsm
matchlen_match8_match_nolit_encodeBetterBlockAsm:
CMPL R9, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm
MOVQ (R10)(R13*1), R12
XORQ (R11)(R13*1), R12
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
LEAL -8(R9), R9
LEAL 8(R13), R13
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL (R13)(R12*1), R13
JMP match_nolit_end_encodeBetterBlockAsm
matchlen_match4_match_nolit_encodeBetterBlockAsm:
CMPL R9, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm
MOVL (R10)(R13*1), R12
CMPL (R11)(R13*1), R12
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm
LEAL -4(R9), R9
LEAL 4(R13), R13
matchlen_match2_match_nolit_encodeBetterBlockAsm:
CMPL R9, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm
JB match_nolit_end_encodeBetterBlockAsm
MOVW (R10)(R13*1), R12
CMPW (R11)(R13*1), R12
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm
LEAL 2(R13), R13
SUBL $0x02, R9
JZ match_nolit_end_encodeBetterBlockAsm
matchlen_match1_match_nolit_encodeBetterBlockAsm:
MOVB (R10)(R13*1), R12
CMPB (R11)(R13*1), R12
JNE match_nolit_end_encodeBetterBlockAsm
LEAL 1(R13), R13
match_nolit_end_encodeBetterBlockAsm:
MOVL AX, R9
SUBL R8, R9
CMPL R13, $0x01
JA match_length_ok_encodeBetterBlockAsm
CMPL R9, $0x0001003f
JBE match_length_ok_encodeBetterBlockAsm
MOVL 20(SP), AX
INCL AX
JMP search_loop_encodeBetterBlockAsm
match_length_ok_encodeBetterBlockAsm:
MOVL R9, 16(SP)
// Check if we can combine lit+copy
MOVLQZX 12(SP), R10
MOVL DI, R8
SUBL R10, R8
JZ match_emit_nolits_encodeBetterBlockAsm
CMPL R9, $0x00000040
JL match_emit_lits_encodeBetterBlockAsm
CMPL R9, $0x0001003f
JA match_emit_copy3_encodeBetterBlockAsm
CMPL R8, $0x04
JA match_emit_lits_encodeBetterBlockAsm
MOVL (DX)(R10*1), R10
ADDL R13, AX
ADDL $0x04, R13
MOVL AX, 12(SP)
// emitCopy2WithLits
XORQ R11, R11
SUBL $0x40, R9
LEAL -11(R13), R12
LEAL -4(R13), R13
MOVW R9, 1(CX)
CMPL R13, $0x07
CMOVLGE R12, R11
MOVQ $0x00000007, R9
CMOVLLT R13, R9
LEAL -1(R8)(R9*4), R9
MOVL $0x00000003, R12
LEAL (R12)(R9*8), R9
MOVB R9, (CX)
ADDQ $0x03, CX
MOVL R10, (CX)
ADDQ R8, CX
TESTL R11, R11
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm
// emitRepeat
LEAL -1(R11), R8
CMPL R11, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm
LEAL -30(R11), R8
CMPL R11, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm
CMPL R11, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm
MOVB $0xfc, (CX)
MOVL R8, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm:
MOVB $0xf4, (CX)
MOVW R8, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm:
MOVB $0xec, (CX)
MOVB R8, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm:
XORL R8, R8
LEAL -4(R8)(R11*8), R8
MOVB R8, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
match_emit_copy3_encodeBetterBlockAsm:
CMPL R8, $0x03
JA match_emit_lits_encodeBetterBlockAsm
MOVLQZX 12(SP), R10
MOVL (DX)(R10*1), R10
ADDL R13, AX
ADDL $0x04, R13
MOVL AX, 12(SP)
// emitCopy3
LEAL -4(R13), R13
LEAL -65536(R9), R9
SHLL $0x0b, R9
LEAL 7(R9)(R8*8), R9
CMPL R13, $0x3c
JBE emit_copy3_0_match_emit_lits_encodeBetterBlockAsm
LEAL -60(R13), R11
CMPL R13, $0x0000013c
JB emit_copy3_1_match_emit_lits_encodeBetterBlockAsm
CMPL R13, $0x0001003c
JB emit_copy3_2_match_emit_lits_encodeBetterBlockAsm
ADDL $0x000007e0, R9
MOVL R9, (CX)
MOVL R11, 4(CX)
ADDQ $0x07, CX
JMP match_emit_copy_litsencodeBetterBlockAsm
emit_copy3_2_match_emit_lits_encodeBetterBlockAsm:
ADDL $0x000007c0, R9
MOVL R9, (CX)
MOVW R11, 4(CX)
ADDQ $0x06, CX
JMP match_emit_copy_litsencodeBetterBlockAsm
emit_copy3_1_match_emit_lits_encodeBetterBlockAsm:
ADDL $0x000007a0, R9
MOVL R9, (CX)
MOVB R11, 4(CX)
ADDQ $0x05, CX
JMP match_emit_copy_litsencodeBetterBlockAsm
emit_copy3_0_match_emit_lits_encodeBetterBlockAsm:
SHLL $0x05, R13
ORL R13, R9
MOVL R9, (CX)
ADDQ $0x04, CX
match_emit_copy_litsencodeBetterBlockAsm:
MOVL R10, (CX)
ADDQ R8, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
match_emit_lits_encodeBetterBlockAsm:
LEAQ (DX)(R10*1), R10
// emitLiteral
LEAL -1(R8), R11
CMPL R11, $0x1d
JB one_byte_match_emit_encodeBetterBlockAsm
SUBL $0x1d, R11
CMPL R11, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm
CMPL R11, $0x00010000
JB three_bytes_match_emit_encodeBetterBlockAsm
MOVL R11, R12
SHRL $0x10, R12
MOVB $0xf8, (CX)
MOVW R11, 1(CX)
MOVB R12, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R11
JMP memmove_long_match_emit_encodeBetterBlockAsm
three_bytes_match_emit_encodeBetterBlockAsm:
MOVB $0xf0, (CX)
MOVW R11, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R11
JMP memmove_long_match_emit_encodeBetterBlockAsm
two_bytes_match_emit_encodeBetterBlockAsm:
MOVB $0xe8, (CX)
MOVB R11, 1(CX)
ADDL $0x1d, R11
ADDQ $0x02, CX
CMPL R11, $0x40
JB memmove_midmatch_emit_encodeBetterBlockAsm
JMP memmove_long_match_emit_encodeBetterBlockAsm
one_byte_match_emit_encodeBetterBlockAsm:
SHLB $0x03, R11
MOVB R11, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R8*1), R11
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
MOVOU (R10), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R8*1), X2
MOVOU -16(R10)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm:
MOVQ R11, CX
JMP match_emit_nolits_encodeBetterBlockAsm
memmove_midmatch_emit_encodeBetterBlockAsm:
LEAQ (CX)(R8*1), R11
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R8, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R8*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R8*1)
JMP memmove_mid_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R8*1), X2
MOVOU -16(R10)(R8*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
memmove_mid_end_copy_match_emit_encodeBetterBlockAsm:
MOVQ R11, CX
JMP match_emit_nolits_encodeBetterBlockAsm
memmove_long_match_emit_encodeBetterBlockAsm:
LEAQ (CX)(R8*1), R11
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R8*1), X2
MOVOU -16(R10)(R8*1), X3
MOVQ R8, R14
SHRQ $0x05, R14
MOVQ CX, R12
ANDL $0x0000001f, R12
MOVQ $0x00000040, R15
SUBQ R12, R15
DECQ R14
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R10)(R15*1), R12
LEAQ -32(CX)(R15*1), BP
emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (R12), X4
MOVOU 16(R12), X5
MOVOA X4, (BP)
MOVOA X5, 16(BP)
ADDQ $0x20, BP
ADDQ $0x20, R12
ADDQ $0x20, R15
DECQ R14
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R10)(R15*1), X4
MOVOU -16(R10)(R15*1), X5
MOVOA X4, -32(CX)(R15*1)
MOVOA X5, -16(CX)(R15*1)
ADDQ $0x20, R15
CMPQ R8, R15
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R8*1)
MOVOU X3, -16(CX)(R8*1)
MOVQ R11, CX
match_emit_nolits_encodeBetterBlockAsm:
ADDL R13, AX
ADDL $0x04, R13
MOVL AX, 12(SP)
// emitCopy
CMPL R9, $0x0001003f
JBE two_byte_offset_match_nolit_encodeBetterBlockAsm
// emitCopy3
LEAL -4(R13), R13
LEAL -65536(R9), R8
SHLL $0x0b, R8
ADDL $0x07, R8
CMPL R13, $0x3c
JBE emit_copy3_0_match_nolit_encodeBetterBlockAsm_emit3
LEAL -60(R13), R9
CMPL R13, $0x0000013c
JB emit_copy3_1_match_nolit_encodeBetterBlockAsm_emit3
CMPL R13, $0x0001003c
JB emit_copy3_2_match_nolit_encodeBetterBlockAsm_emit3
ADDL $0x000007e0, R8
MOVL R8, (CX)
MOVL R9, 4(CX)
ADDQ $0x07, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy3_2_match_nolit_encodeBetterBlockAsm_emit3:
ADDL $0x000007c0, R8
MOVL R8, (CX)
MOVW R9, 4(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy3_1_match_nolit_encodeBetterBlockAsm_emit3:
ADDL $0x000007a0, R8
MOVL R8, (CX)
MOVB R9, 4(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy3_0_match_nolit_encodeBetterBlockAsm_emit3:
SHLL $0x05, R13
ORL R13, R8
MOVL R8, (CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
two_byte_offset_match_nolit_encodeBetterBlockAsm:
CMPL R9, $0x00000400
JA two_byte_match_nolit_encodeBetterBlockAsm
CMPL R13, $0x00000013
JAE emit_one_longer_match_nolit_encodeBetterBlockAsm
LEAL -1(R9), R8
SHLL $0x06, R8
LEAL -15(R8)(R13*4), R8
MOVW R8, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_one_longer_match_nolit_encodeBetterBlockAsm:
CMPL R13, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBetterBlockAsm
LEAL -1(R9), R8
SHLL $0x06, R8
LEAL 61(R8), R8
MOVW R8, (CX)
LEAL -18(R13), R8
MOVB R8, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy1_repeat_match_nolit_encodeBetterBlockAsm:
LEAL -1(R9), R8
SHLL $0x06, R8
LEAL 57(R8), R8
MOVW R8, (CX)
ADDQ $0x02, CX
SUBL $0x12, R13
// emitRepeat
LEAL -1(R13), R8
CMPL R13, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm
LEAL -30(R13), R8
CMPL R13, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm
CMPL R13, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm
MOVB $0xfc, (CX)
MOVL R8, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm:
MOVB $0xf4, (CX)
MOVW R8, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm:
MOVB $0xec, (CX)
MOVB R8, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm:
XORL R8, R8
LEAL -4(R8)(R13*8), R8
MOVB R8, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
two_byte_match_nolit_encodeBetterBlockAsm:
// emitCopy2
LEAL -64(R9), R9
LEAL -4(R13), R13
MOVW R9, 1(CX)
CMPL R13, $0x3c
JBE emit_copy2_0_match_nolit_encodeBetterBlockAsm_emit2
LEAL -60(R13), R8
CMPL R13, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBetterBlockAsm_emit2
CMPL R13, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBetterBlockAsm_emit2
MOVB $0xfe, (CX)
MOVL R8, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy2_2_match_nolit_encodeBetterBlockAsm_emit2:
MOVB $0xfa, (CX)
MOVW R8, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy2_1_match_nolit_encodeBetterBlockAsm_emit2:
MOVB $0xf6, (CX)
MOVB R8, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy2_0_match_nolit_encodeBetterBlockAsm_emit2:
MOVL $0x00000002, R8
LEAL (R8)(R13*4), R8
MOVB R8, (CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
// emitLiteralsDstP
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), R10
SUBL R8, R9
// emitLiteral
LEAL -1(R9), R8
CMPL R8, $0x1d
JB one_byte_match_emit_repeat_encodeBetterBlockAsm
SUBL $0x1d, R8
CMPL R8, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm
CMPL R8, $0x00010000
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm
MOVL R8, R11
SHRL $0x10, R11
MOVB $0xf8, (CX)
MOVW R8, 1(CX)
MOVB R11, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R8
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
three_bytes_match_emit_repeat_encodeBetterBlockAsm:
MOVB $0xf0, (CX)
MOVW R8, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R8
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
two_bytes_match_emit_repeat_encodeBetterBlockAsm:
MOVB $0xe8, (CX)
MOVB R8, 1(CX)
ADDL $0x1d, R8
ADDQ $0x02, CX
CMPL R8, $0x40
JB memmove_midmatch_emit_repeat_encodeBetterBlockAsm
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
one_byte_match_emit_repeat_encodeBetterBlockAsm:
SHLB $0x03, R8
MOVB R8, (CX)
ADDQ $0x01, CX
LEAQ (CX)(R9*1), R8
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
MOVOU (R10), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R9*1)
MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
MOVQ R8, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
memmove_midmatch_emit_repeat_encodeBetterBlockAsm:
LEAQ (CX)(R9*1), R8
// genMemMoveShort
// margin: 15, min move: 30
CMPQ R9, $0x20
JBE emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(R9*1)
JMP memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R9*1)
MOVOU X3, -16(CX)(R9*1)
memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm:
MOVQ R8, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
memmove_long_match_emit_repeat_encodeBetterBlockAsm:
LEAQ (CX)(R9*1), R8
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R12
SHRQ $0x05, R12
MOVQ CX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R12
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R12
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(CX)(R14*1)
MOVOA X5, -16(CX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(R9*1)
MOVOU X3, -16(CX)(R9*1)
MOVQ R8, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
ADDL R13, AX
ADDL $0x04, R13
MOVL AX, 12(SP)
// emitRepeat
LEAL -1(R13), R8
CMPL R13, $0x1d
JBE repeat_one_match_nolit_repeat_encodeBetterBlockAsm
LEAL -30(R13), R8
CMPL R13, $0x0000011e
JB repeat_two_match_nolit_repeat_encodeBetterBlockAsm
CMPL R13, $0x0001001e
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm
MOVB $0xfc, (CX)
MOVL R8, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
MOVB $0xf4, (CX)
MOVW R8, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
MOVB $0xec, (CX)
MOVB R8, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_one_match_nolit_repeat_encodeBetterBlockAsm:
XORL R8, R8
LEAL -4(R8)(R13*8), R8
MOVB R8, (CX)
ADDQ $0x01, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm:
CMPL AX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm:
MOVQ tmp+48(FP), R8
MOVQ $0x00cf1bbcdcbfa563, R9
MOVQ $0x9e3779b1, R10
LEAQ 1(DI), DI
LEAQ -2(AX), R11
MOVQ (DX)(DI*1), R12
MOVQ 1(DX)(DI*1), R13
MOVQ (DX)(R11*1), R14
MOVQ 1(DX)(R11*1), R15
SHLQ $0x08, R12
IMULQ R9, R12
SHRQ $0x2f, R12
SHLQ $0x20, R13
IMULQ R10, R13
SHRQ $0x32, R13
SHLQ $0x08, R14
IMULQ R9, R14
SHRQ $0x2f, R14
SHLQ $0x20, R15
IMULQ R10, R15
SHRQ $0x32, R15
LEAQ 1(DI), R10
LEAQ 1(R11), BP
MOVL DI, (R8)(R12*4)
MOVL R11, (R8)(R14*4)
LEAQ 1(R11)(DI*1), R12
SHRQ $0x01, R12
ADDQ $0x01, DI
SUBQ $0x01, R11
MOVL R10, 524288(R8)(R13*4)
MOVL BP, 524288(R8)(R15*4)
index_loop_encodeBetterBlockAsm:
CMPQ R12, R11
JAE search_loop_encodeBetterBlockAsm
MOVQ (DX)(DI*1), R10
MOVQ (DX)(R12*1), R13
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x2f, R10
SHLQ $0x08, R13
IMULQ R9, R13
SHRQ $0x2f, R13
MOVL DI, (R8)(R10*4)
MOVL R11, (R8)(R13*4)
ADDQ $0x02, DI
ADDQ $0x02, R12
JMP index_loop_encodeBetterBlockAsm
emit_remainder_encodeBetterBlockAsm:
MOVQ src_len+32(FP), AX
SUBL 12(SP), AX
LEAQ 4(CX)(AX*1), AX
CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm
MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm:
MOVQ src_len+32(FP), AX
// emitLiteralsDstP
MOVL 12(SP), BX
CMPL BX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm
MOVL AX, SI
MOVL AX, 12(SP)
LEAQ (DX)(BX*1), AX
SUBL BX, SI
// emitLiteral
LEAL -1(SI), DX
CMPL DX, $0x1d
JB one_byte_emit_remainder_encodeBetterBlockAsm
SUBL $0x1d, DX
CMPL DX, $0x00000100
JB two_bytes_emit_remainder_encodeBetterBlockAsm
CMPL DX, $0x00010000
JB three_bytes_emit_remainder_encodeBetterBlockAsm
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB BL, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
three_bytes_emit_remainder_encodeBetterBlockAsm:
MOVB $0xf0, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
two_bytes_emit_remainder_encodeBetterBlockAsm:
MOVB $0xe8, (CX)
MOVB DL, 1(CX)
ADDL $0x1d, DX
ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_midemit_remainder_encodeBetterBlockAsm
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
one_byte_emit_remainder_encodeBetterBlockAsm:
SHLB $0x03, DL
MOVB DL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -1, min move: 1
CMPQ BX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3
CMPQ BX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
MOVB (AX), SI
MOVB -1(AX)(BX*1), AL
MOVB SI, (CX)
MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
MOVW (AX), SI
MOVB 2(AX), AL
MOVW SI, (CX)
MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through8:
MOVL (AX), SI
MOVL -4(AX)(BX*1), AX
MOVL SI, (CX)
MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
MOVQ (AX), SI
MOVQ -8(AX)(BX*1), AX
MOVQ SI, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm
memmove_midemit_remainder_encodeBetterBlockAsm:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -2, min move: 30
CMPQ BX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm
memmove_long_emit_remainder_encodeBetterBlockAsm:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(AX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(AX)(R8*1), X4
MOVOU -16(AX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBetterBlockAsm2MB(dst []byte, src []byte, tmp *[589824]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBetterBlockAsm2MB(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00001200, DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm2MB:
MOVOU X0, (AX)
MOVOU X0, 16(AX)
MOVOU X0, 32(AX)
MOVOU X0, 48(AX)
MOVOU X0, 64(AX)
MOVOU X0, 80(AX)
MOVOU X0, 96(AX)
MOVOU X0, 112(AX)
ADDQ $0x80, AX
DECQ DX
JNZ zero_loop_encodeBetterBlockAsm2MB
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), AX
LEAQ -17(AX), DX
LEAQ -17(AX), BX
MOVL BX, 8(SP)
SHRQ $0x05, AX
SUBL AX, DX
LEAQ (CX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, AX
MOVL AX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm2MB:
MOVQ tmp+48(FP), BX
MOVL AX, SI
SUBL 12(SP), SI
SHRL $0x07, SI
CMPL SI, $0x63
JBE check_maxskip_ok_encodeBetterBlockAsm2MB
LEAL 100(AX), SI
JMP check_maxskip_cont_encodeBetterBlockAsm2MB
check_maxskip_ok_encodeBetterBlockAsm2MB:
LEAL 1(AX)(SI*1), SI
check_maxskip_cont_encodeBetterBlockAsm2MB:
CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm2MB
MOVQ (DX)(AX*1), DI
MOVL SI, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL (BX)(R10*4), SI
MOVL 524288(BX)(R11*4), R8
MOVL AX, (BX)(R10*4)
MOVL AX, 524288(BX)(R11*4)
MOVQ (DX)(SI*1), R10
CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm2MB
MOVQ (DX)(R8*1), R11
CMPQ R11, DI
MOVL AX, R12
SUBL 16(SP), R12
MOVQ (DX)(R12*1), R12
MOVQ $0x000000ffffffff00, R13
XORQ DI, R12
TESTQ R13, R12
JNE no_repeat_found_encodeBetterBlockAsm2MB
LEAL 1(AX), BX
MOVL 12(SP), SI
MOVL BX, DI
SUBL 16(SP), DI
JZ repeat_extend_back_end_encodeBetterBlockAsm2MB
repeat_extend_back_loop_encodeBetterBlockAsm2MB:
CMPL BX, SI
JBE repeat_extend_back_end_encodeBetterBlockAsm2MB
MOVB -1(DX)(DI*1), R8
MOVB -1(DX)(BX*1), R9
CMPB R8, R9
JNE repeat_extend_back_end_encodeBetterBlockAsm2MB
LEAL -1(BX), BX
DECL DI
JNZ repeat_extend_back_loop_encodeBetterBlockAsm2MB
repeat_extend_back_end_encodeBetterBlockAsm2MB:
MOVL BX, SI
SUBL 12(SP), SI
LEAQ 4(CX)(SI*1), SI
CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBetterBlockAsm2MB
MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBetterBlockAsm2MB:
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_repeat_emit_encodeBetterBlockAsm2MB
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_repeat_emit_encodeBetterBlockAsm2MB
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBetterBlockAsm2MB
CMPL SI, $0x00010000
JB three_bytes_repeat_emit_encodeBetterBlockAsm2MB
MOVL SI, R9
SHRL $0x10, R9
MOVB $0xf8, (CX)
MOVW SI, 1(CX)
MOVB R9, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm2MB
three_bytes_repeat_emit_encodeBetterBlockAsm2MB:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm2MB
two_bytes_repeat_emit_encodeBetterBlockAsm2MB:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midrepeat_emit_encodeBetterBlockAsm2MB
JMP memmove_long_repeat_emit_encodeBetterBlockAsm2MB
one_byte_repeat_emit_encodeBetterBlockAsm2MB:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 16, min move: 1
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm2MB
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm2MB
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_repeat_emit_encodeBetterBlockAsm2MB:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm2MB
memmove_midrepeat_emit_encodeBetterBlockAsm2MB:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 15, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm2MB
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm2MB:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm2MB
memmove_long_repeat_emit_encodeBetterBlockAsm2MB:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R11
SUBQ R9, R11
DECQ R10
JA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
LEAQ -32(R8)(R11*1), R9
LEAQ -32(CX)(R11*1), R12
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm2MBlarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R12)
MOVOA X5, 16(R12)
ADDQ $0x20, R12
ADDQ $0x20, R9
ADDQ $0x20, R11
DECQ R10
JNA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm2MBlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32:
MOVOU -32(R8)(R11*1), X4
MOVOU -16(R8)(R11*1), X5
MOVOA X4, -32(CX)(R11*1)
MOVOA X5, -16(CX)(R11*1)
ADDQ $0x20, R11
CMPQ DI, R11
JAE emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBetterBlockAsm2MB:
ADDL $0x05, AX
MOVL AX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
JMP matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm2MB
matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm2MB:
MOVQ (R8)(R10*1), R9
MOVQ 8(R8)(R10*1), R11
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm2MB
XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16repeat_extend_encodeBetterBlockAsm2MB
LEAL -16(DI), DI
LEAL 16(R10), R10
matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm2MB:
CMPL DI, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm2MB
JMP matchlen_match8_repeat_extend_encodeBetterBlockAsm2MB
matchlen_bsf_16repeat_extend_encodeBetterBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R11, R11
#else
BSFQ R11, R11
#endif
SARQ $0x03, R11
LEAL 8(R10)(R11*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm2MB
matchlen_match8_repeat_extend_encodeBetterBlockAsm2MB:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeBetterBlockAsm2MB
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm2MB
LEAL -8(DI), DI
LEAL 8(R10), R10
JMP matchlen_match4_repeat_extend_encodeBetterBlockAsm2MB
matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm2MB
matchlen_match4_repeat_extend_encodeBetterBlockAsm2MB:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeBetterBlockAsm2MB
MOVL (R8)(R10*1), R9
CMPL (SI)(R10*1), R9
JNE matchlen_match2_repeat_extend_encodeBetterBlockAsm2MB
LEAL -4(DI), DI
LEAL 4(R10), R10
matchlen_match2_repeat_extend_encodeBetterBlockAsm2MB:
CMPL DI, $0x01
JE matchlen_match1_repeat_extend_encodeBetterBlockAsm2MB
JB repeat_extend_forward_end_encodeBetterBlockAsm2MB
MOVW (R8)(R10*1), R9
CMPW (SI)(R10*1), R9
JNE matchlen_match1_repeat_extend_encodeBetterBlockAsm2MB
LEAL 2(R10), R10
SUBL $0x02, DI
JZ repeat_extend_forward_end_encodeBetterBlockAsm2MB
matchlen_match1_repeat_extend_encodeBetterBlockAsm2MB:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE repeat_extend_forward_end_encodeBetterBlockAsm2MB
LEAL 1(R10), R10
repeat_extend_forward_end_encodeBetterBlockAsm2MB:
ADDL R10, AX
MOVL AX, SI
SUBL BX, SI
MOVL 16(SP), BX
// emitRepeat
LEAL -1(SI), BX
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBetterBlockAsm2MB
LEAL -30(SI), BX
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBetterBlockAsm2MB
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBetterBlockAsm2MB
MOVB $0xfc, (CX)
MOVL BX, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBetterBlockAsm2MB
repeat_three_match_repeat_encodeBetterBlockAsm2MB:
MOVB $0xf4, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBetterBlockAsm2MB
repeat_two_match_repeat_encodeBetterBlockAsm2MB:
MOVB $0xec, (CX)
MOVB BL, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBetterBlockAsm2MB
repeat_one_match_repeat_encodeBetterBlockAsm2MB:
XORL BX, BX
LEAL -4(BX)(SI*8), BX
MOVB BL, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBetterBlockAsm2MB:
MOVL AX, 12(SP)
JMP search_loop_encodeBetterBlockAsm2MB
no_repeat_found_encodeBetterBlockAsm2MB:
CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm2MB
CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm2MB
MOVL 20(SP), AX
JMP search_loop_encodeBetterBlockAsm2MB
candidateS_match_encodeBetterBlockAsm2MB:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x2f, R10
MOVL (BX)(R10*4), SI
INCL AX
MOVL AX, (BX)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm2MB
DECL AX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm2MB:
MOVL 12(SP), BX
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm2MB
match_extend_back_loop_encodeBetterBlockAsm2MB:
CMPL AX, BX
JBE match_extend_back_end_encodeBetterBlockAsm2MB
MOVB -1(DX)(SI*1), DI
MOVB -1(DX)(AX*1), R8
CMPB DI, R8
JNE match_extend_back_end_encodeBetterBlockAsm2MB
LEAL -1(AX), AX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm2MB
JMP match_extend_back_loop_encodeBetterBlockAsm2MB
match_extend_back_end_encodeBetterBlockAsm2MB:
MOVL AX, BX
SUBL 12(SP), BX
LEAQ 4(CX)(BX*1), BX
CMPQ BX, (SP)
JB match_dst_size_check_encodeBetterBlockAsm2MB
MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm2MB:
MOVL AX, BX
ADDL $0x04, AX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), R9
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm2MB
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm2MB:
MOVQ (R8)(R11*1), R10
MOVQ 8(R8)(R11*1), R12
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm2MB
XORQ 8(R9)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm2MB
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm2MB:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBetterBlockAsm2MB
JMP matchlen_match8_match_nolit_encodeBetterBlockAsm2MB
matchlen_bsf_16match_nolit_encodeBetterBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBetterBlockAsm2MB
matchlen_match8_match_nolit_encodeBetterBlockAsm2MB:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm2MB
MOVQ (R8)(R11*1), R10
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm2MB
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm2MB
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm2MB:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm2MB
matchlen_match4_match_nolit_encodeBetterBlockAsm2MB:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm2MB
MOVL (R8)(R11*1), R10
CMPL (R9)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm2MB
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBetterBlockAsm2MB:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm2MB
JB match_nolit_end_encodeBetterBlockAsm2MB
MOVW (R8)(R11*1), R10
CMPW (R9)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm2MB
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBetterBlockAsm2MB
matchlen_match1_match_nolit_encodeBetterBlockAsm2MB:
MOVB (R8)(R11*1), R10
CMPB (R9)(R11*1), R10
JNE match_nolit_end_encodeBetterBlockAsm2MB
LEAL 1(R11), R11
match_nolit_end_encodeBetterBlockAsm2MB:
MOVL AX, DI
SUBL SI, DI
CMPL R11, $0x01
JA match_length_ok_encodeBetterBlockAsm2MB
CMPL DI, $0x0001003f
JBE match_length_ok_encodeBetterBlockAsm2MB
MOVL 20(SP), AX
INCL AX
JMP search_loop_encodeBetterBlockAsm2MB
match_length_ok_encodeBetterBlockAsm2MB:
MOVL DI, 16(SP)
// Check if we can combine lit+copy
MOVLQZX 12(SP), R8
MOVL BX, SI
SUBL R8, SI
JZ match_emit_nolits_encodeBetterBlockAsm2MB
CMPL DI, $0x00000040
JL match_emit_lits_encodeBetterBlockAsm2MB
CMPL DI, $0x0001003f
JA match_emit_copy3_encodeBetterBlockAsm2MB
CMPL SI, $0x04
JA match_emit_lits_encodeBetterBlockAsm2MB
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, DI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, DI
CMOVLLT R11, DI
LEAL -1(SI)(DI*4), DI
MOVL $0x00000003, R10
LEAL (R10)(DI*8), DI
MOVB DI, (CX)
ADDQ $0x03, CX
MOVL R8, (CX)
ADDQ SI, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm2MB
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm2MB
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm2MB
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm2MB:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm2MB:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm2MB:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
match_emit_copy3_encodeBetterBlockAsm2MB:
CMPL SI, $0x03
JA match_emit_lits_encodeBetterBlockAsm2MB
MOVLQZX 12(SP), R8
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(DI), DI
SHLL $0x0b, DI
LEAL 7(DI)(SI*8), DI
CMPL R11, $0x3c
JBE emit_copy3_0_match_emit_lits_encodeBetterBlockAsm2MB
LEAL -60(R11), R9
CMPL R11, $0x0000013c
JB emit_copy3_1_match_emit_lits_encodeBetterBlockAsm2MB
CMPL R11, $0x0001003c
JB emit_copy3_2_match_emit_lits_encodeBetterBlockAsm2MB
ADDL $0x000007e0, DI
MOVL DI, (CX)
MOVL R9, 4(CX)
ADDQ $0x07, CX
JMP match_emit_copy_litsencodeBetterBlockAsm2MB
emit_copy3_2_match_emit_lits_encodeBetterBlockAsm2MB:
ADDL $0x000007c0, DI
MOVL DI, (CX)
MOVW R9, 4(CX)
ADDQ $0x06, CX
JMP match_emit_copy_litsencodeBetterBlockAsm2MB
emit_copy3_1_match_emit_lits_encodeBetterBlockAsm2MB:
ADDL $0x000007a0, DI
MOVL DI, (CX)
MOVB R9, 4(CX)
ADDQ $0x05, CX
JMP match_emit_copy_litsencodeBetterBlockAsm2MB
emit_copy3_0_match_emit_lits_encodeBetterBlockAsm2MB:
SHLL $0x05, R11
ORL R11, DI
MOVL DI, (CX)
ADDQ $0x04, CX
match_emit_copy_litsencodeBetterBlockAsm2MB:
MOVL R8, (CX)
ADDQ SI, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
match_emit_lits_encodeBetterBlockAsm2MB:
LEAQ (DX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBetterBlockAsm2MB
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm2MB
CMPL R9, $0x00010000
JB three_bytes_match_emit_encodeBetterBlockAsm2MB
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm2MB
three_bytes_match_emit_encodeBetterBlockAsm2MB:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm2MB
two_bytes_match_emit_encodeBetterBlockAsm2MB:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBetterBlockAsm2MB
JMP memmove_long_match_emit_encodeBetterBlockAsm2MB
one_byte_match_emit_encodeBetterBlockAsm2MB:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 16, min move: 1
CMPQ SI, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm2MB_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm2MB_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm2MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm2MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm2MB:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm2MB
memmove_midmatch_emit_encodeBetterBlockAsm2MB:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 15, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_match_emit_encodeBetterBlockAsm2MB
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_match_emit_encodeBetterBlockAsm2MB:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm2MB
memmove_long_match_emit_encodeBetterBlockAsm2MB:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
LEAQ -32(R8)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm2MBlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm2MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32:
MOVOU -32(R8)(R13*1), X4
MOVOU -16(R8)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ SI, R13
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
match_emit_nolits_encodeBetterBlockAsm2MB:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy
CMPL DI, $0x0001003f
JBE two_byte_offset_match_nolit_encodeBetterBlockAsm2MB
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(DI), SI
SHLL $0x0b, SI
ADDL $0x07, SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_nolit_encodeBetterBlockAsm2MB_emit3
LEAL -60(R11), DI
CMPL R11, $0x0000013c
JB emit_copy3_1_match_nolit_encodeBetterBlockAsm2MB_emit3
CMPL R11, $0x0001003c
JB emit_copy3_2_match_nolit_encodeBetterBlockAsm2MB_emit3
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL DI, 4(CX)
ADDQ $0x07, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_copy3_2_match_nolit_encodeBetterBlockAsm2MB_emit3:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW DI, 4(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_copy3_1_match_nolit_encodeBetterBlockAsm2MB_emit3:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB DI, 4(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_copy3_0_match_nolit_encodeBetterBlockAsm2MB_emit3:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
two_byte_offset_match_nolit_encodeBetterBlockAsm2MB:
CMPL DI, $0x00000400
JA two_byte_match_nolit_encodeBetterBlockAsm2MB
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBetterBlockAsm2MB
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_one_longer_match_nolit_encodeBetterBlockAsm2MB:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBetterBlockAsm2MB
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_copy1_repeat_match_nolit_encodeBetterBlockAsm2MB:
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm2MB
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm2MB
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm2MB
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm2MB:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm2MB:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm2MB:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
two_byte_match_nolit_encodeBetterBlockAsm2MB:
// emitCopy2
LEAL -64(DI), DI
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBetterBlockAsm2MB_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBetterBlockAsm2MB_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBetterBlockAsm2MB_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_copy2_2_match_nolit_encodeBetterBlockAsm2MB_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_copy2_1_match_nolit_encodeBetterBlockAsm2MB_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
emit_copy2_0_match_nolit_encodeBetterBlockAsm2MB_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm2MB
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_match_emit_repeat_encodeBetterBlockAsm2MB
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm2MB
CMPL SI, $0x00010000
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm2MB
MOVL SI, R9
SHRL $0x10, R9
MOVB $0xf8, (CX)
MOVW SI, 1(CX)
MOVB R9, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm2MB
three_bytes_match_emit_repeat_encodeBetterBlockAsm2MB:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm2MB
two_bytes_match_emit_repeat_encodeBetterBlockAsm2MB:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midmatch_emit_repeat_encodeBetterBlockAsm2MB
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm2MB
one_byte_match_emit_repeat_encodeBetterBlockAsm2MB:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 16, min move: 1
CMPQ DI, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm2MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm2MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm2MB:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm2MB
memmove_midmatch_emit_repeat_encodeBetterBlockAsm2MB:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 15, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm2MB
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm2MB:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm2MB
memmove_long_match_emit_repeat_encodeBetterBlockAsm2MB:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R12
SUBQ R9, R12
DECQ R10
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R9
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm2MBlarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R9
ADDQ $0x20, R12
DECQ R10
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm2MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ DI, R12
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm2MB:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_match_nolit_repeat_encodeBetterBlockAsm2MB
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_match_nolit_repeat_encodeBetterBlockAsm2MB
CMPL R11, $0x0001001e
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm2MB
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_three_match_nolit_repeat_encodeBetterBlockAsm2MB:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_two_match_nolit_repeat_encodeBetterBlockAsm2MB:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm2MB
repeat_one_match_nolit_repeat_encodeBetterBlockAsm2MB:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm2MB:
CMPL AX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm2MB
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm2MB
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm2MB:
MOVQ tmp+48(FP), SI
MOVQ $0x00cf1bbcdcbfa563, DI
MOVQ $0x9e3779b1, R8
LEAQ 1(BX), BX
LEAQ -2(AX), R9
MOVQ (DX)(BX*1), R10
MOVQ 1(DX)(BX*1), R11
MOVQ (DX)(R9*1), R12
MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ DI, R10
SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x08, R12
IMULQ DI, R12
SHRQ $0x2f, R12
SHLQ $0x20, R13
IMULQ R8, R13
SHRQ $0x32, R13
LEAQ 1(BX), R8
LEAQ 1(R9), R14
MOVL BX, (SI)(R10*4)
MOVL R9, (SI)(R12*4)
LEAQ 1(R9)(BX*1), R10
SHRQ $0x01, R10
ADDQ $0x01, BX
SUBQ $0x01, R9
MOVL R8, 524288(SI)(R11*4)
MOVL R14, 524288(SI)(R13*4)
index_loop_encodeBetterBlockAsm2MB:
CMPQ R10, R9
JAE search_loop_encodeBetterBlockAsm2MB
MOVQ (DX)(BX*1), R8
MOVQ (DX)(R10*1), R11
SHLQ $0x08, R8
IMULQ DI, R8
SHRQ $0x2f, R8
SHLQ $0x08, R11
IMULQ DI, R11
SHRQ $0x2f, R11
MOVL BX, (SI)(R8*4)
MOVL R9, (SI)(R11*4)
ADDQ $0x02, BX
ADDQ $0x02, R10
JMP index_loop_encodeBetterBlockAsm2MB
emit_remainder_encodeBetterBlockAsm2MB:
MOVQ src_len+32(FP), AX
SUBL 12(SP), AX
LEAQ 4(CX)(AX*1), AX
CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm2MB
MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm2MB:
MOVQ src_len+32(FP), AX
// emitLiteralsDstP
MOVL 12(SP), BX
CMPL BX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm2MB
MOVL AX, SI
MOVL AX, 12(SP)
LEAQ (DX)(BX*1), AX
SUBL BX, SI
// emitLiteral
LEAL -1(SI), DX
CMPL DX, $0x1d
JB one_byte_emit_remainder_encodeBetterBlockAsm2MB
SUBL $0x1d, DX
CMPL DX, $0x00000100
JB two_bytes_emit_remainder_encodeBetterBlockAsm2MB
CMPL DX, $0x00010000
JB three_bytes_emit_remainder_encodeBetterBlockAsm2MB
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB BL, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm2MB
three_bytes_emit_remainder_encodeBetterBlockAsm2MB:
MOVB $0xf0, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm2MB
two_bytes_emit_remainder_encodeBetterBlockAsm2MB:
MOVB $0xe8, (CX)
MOVB DL, 1(CX)
ADDL $0x1d, DX
ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_midemit_remainder_encodeBetterBlockAsm2MB
JMP memmove_long_emit_remainder_encodeBetterBlockAsm2MB
one_byte_emit_remainder_encodeBetterBlockAsm2MB:
SHLB $0x03, DL
MOVB DL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -1, min move: 1
CMPQ BX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_3
CMPQ BX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_4through8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_1or2:
MOVB (AX), SI
MOVB -1(AX)(BX*1), AL
MOVB SI, (CX)
MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_3:
MOVW (AX), SI
MOVB 2(AX), AL
MOVW SI, (CX)
MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_4through8:
MOVL (AX), SI
MOVL -4(AX)(BX*1), AX
MOVL SI, (CX)
MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_8through16:
MOVQ (AX), SI
MOVQ -8(AX)(BX*1), AX
MOVQ SI, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm2MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm2MB:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm2MB
memmove_midemit_remainder_encodeBetterBlockAsm2MB:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -2, min move: 30
CMPQ BX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm2MB
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm2MB_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm2MB:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm2MB
memmove_long_emit_remainder_encodeBetterBlockAsm2MB:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
LEAQ -32(AX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm2MBlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm2MBlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32:
MOVOU -32(AX)(R8*1), X4
MOVOU -16(AX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm2MBlarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm2MB:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBetterBlockAsm512K(dst []byte, src []byte, tmp *[294912]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBetterBlockAsm512K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000900, DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm512K:
MOVOU X0, (AX)
MOVOU X0, 16(AX)
MOVOU X0, 32(AX)
MOVOU X0, 48(AX)
MOVOU X0, 64(AX)
MOVOU X0, 80(AX)
MOVOU X0, 96(AX)
MOVOU X0, 112(AX)
ADDQ $0x80, AX
DECQ DX
JNZ zero_loop_encodeBetterBlockAsm512K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), AX
LEAQ -11(AX), DX
LEAQ -8(AX), BX
MOVL BX, 8(SP)
SHRQ $0x05, AX
SUBL AX, DX
LEAQ (CX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, AX
MOVL AX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm512K:
MOVQ tmp+48(FP), BX
MOVL AX, SI
SUBL 12(SP), SI
SHRL $0x07, SI
CMPL SI, $0x63
JBE check_maxskip_ok_encodeBetterBlockAsm512K
LEAL 100(AX), SI
JMP check_maxskip_cont_encodeBetterBlockAsm512K
check_maxskip_ok_encodeBetterBlockAsm512K:
LEAL 1(AX)(SI*1), SI
check_maxskip_cont_encodeBetterBlockAsm512K:
CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm512K
MOVQ (DX)(AX*1), DI
MOVL SI, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x33, R11
MOVL (BX)(R10*4), SI
MOVL 262144(BX)(R11*4), R8
MOVL AX, (BX)(R10*4)
MOVL AX, 262144(BX)(R11*4)
MOVQ (DX)(SI*1), R10
CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm512K
MOVQ (DX)(R8*1), R11
CMPQ R11, DI
MOVL AX, R12
SUBL 16(SP), R12
MOVQ (DX)(R12*1), R12
MOVQ $0x000000ffffffff00, R13
XORQ DI, R12
TESTQ R13, R12
JNE no_repeat_found_encodeBetterBlockAsm512K
LEAL 1(AX), BX
MOVL 12(SP), SI
MOVL BX, DI
SUBL 16(SP), DI
JZ repeat_extend_back_end_encodeBetterBlockAsm512K
repeat_extend_back_loop_encodeBetterBlockAsm512K:
CMPL BX, SI
JBE repeat_extend_back_end_encodeBetterBlockAsm512K
MOVB -1(DX)(DI*1), R8
MOVB -1(DX)(BX*1), R9
CMPB R8, R9
JNE repeat_extend_back_end_encodeBetterBlockAsm512K
LEAL -1(BX), BX
DECL DI
JNZ repeat_extend_back_loop_encodeBetterBlockAsm512K
repeat_extend_back_end_encodeBetterBlockAsm512K:
MOVL BX, SI
SUBL 12(SP), SI
LEAQ 4(CX)(SI*1), SI
CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBetterBlockAsm512K
MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBetterBlockAsm512K:
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_repeat_emit_encodeBetterBlockAsm512K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_repeat_emit_encodeBetterBlockAsm512K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBetterBlockAsm512K
CMPL SI, $0x00010000
JB three_bytes_repeat_emit_encodeBetterBlockAsm512K
MOVL SI, R9
SHRL $0x10, R9
MOVB $0xf8, (CX)
MOVW SI, 1(CX)
MOVB R9, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm512K
three_bytes_repeat_emit_encodeBetterBlockAsm512K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm512K
two_bytes_repeat_emit_encodeBetterBlockAsm512K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midrepeat_emit_encodeBetterBlockAsm512K
JMP memmove_long_repeat_emit_encodeBetterBlockAsm512K
one_byte_repeat_emit_encodeBetterBlockAsm512K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm512K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm512K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm512K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_repeat_emit_encodeBetterBlockAsm512K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm512K
memmove_midrepeat_emit_encodeBetterBlockAsm512K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm512K
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm512K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm512K
memmove_long_repeat_emit_encodeBetterBlockAsm512K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R11
SUBQ R9, R11
DECQ R10
JA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
LEAQ -32(R8)(R11*1), R9
LEAQ -32(CX)(R11*1), R12
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm512Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R12)
MOVOA X5, 16(R12)
ADDQ $0x20, R12
ADDQ $0x20, R9
ADDQ $0x20, R11
DECQ R10
JNA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm512Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm512Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R11*1), X4
MOVOU -16(R8)(R11*1), X5
MOVOA X4, -32(CX)(R11*1)
MOVOA X5, -16(CX)(R11*1)
ADDQ $0x20, R11
CMPQ DI, R11
JAE emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBetterBlockAsm512K:
ADDL $0x05, AX
MOVL AX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
JMP matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm512K
matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm512K:
MOVQ (R8)(R10*1), R9
MOVQ 8(R8)(R10*1), R11
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm512K
XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16repeat_extend_encodeBetterBlockAsm512K
LEAL -16(DI), DI
LEAL 16(R10), R10
matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm512K:
CMPL DI, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm512K
JMP matchlen_match8_repeat_extend_encodeBetterBlockAsm512K
matchlen_bsf_16repeat_extend_encodeBetterBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R11, R11
#else
BSFQ R11, R11
#endif
SARQ $0x03, R11
LEAL 8(R10)(R11*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm512K
matchlen_match8_repeat_extend_encodeBetterBlockAsm512K:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeBetterBlockAsm512K
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm512K
LEAL -8(DI), DI
LEAL 8(R10), R10
JMP matchlen_match4_repeat_extend_encodeBetterBlockAsm512K
matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm512K
matchlen_match4_repeat_extend_encodeBetterBlockAsm512K:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeBetterBlockAsm512K
MOVL (R8)(R10*1), R9
CMPL (SI)(R10*1), R9
JNE matchlen_match2_repeat_extend_encodeBetterBlockAsm512K
LEAL -4(DI), DI
LEAL 4(R10), R10
matchlen_match2_repeat_extend_encodeBetterBlockAsm512K:
CMPL DI, $0x01
JE matchlen_match1_repeat_extend_encodeBetterBlockAsm512K
JB repeat_extend_forward_end_encodeBetterBlockAsm512K
MOVW (R8)(R10*1), R9
CMPW (SI)(R10*1), R9
JNE matchlen_match1_repeat_extend_encodeBetterBlockAsm512K
LEAL 2(R10), R10
SUBL $0x02, DI
JZ repeat_extend_forward_end_encodeBetterBlockAsm512K
matchlen_match1_repeat_extend_encodeBetterBlockAsm512K:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE repeat_extend_forward_end_encodeBetterBlockAsm512K
LEAL 1(R10), R10
repeat_extend_forward_end_encodeBetterBlockAsm512K:
ADDL R10, AX
MOVL AX, SI
SUBL BX, SI
MOVL 16(SP), BX
// emitRepeat
LEAL -1(SI), BX
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBetterBlockAsm512K
LEAL -30(SI), BX
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBetterBlockAsm512K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBetterBlockAsm512K
MOVB $0xfc, (CX)
MOVL BX, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBetterBlockAsm512K
repeat_three_match_repeat_encodeBetterBlockAsm512K:
MOVB $0xf4, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBetterBlockAsm512K
repeat_two_match_repeat_encodeBetterBlockAsm512K:
MOVB $0xec, (CX)
MOVB BL, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBetterBlockAsm512K
repeat_one_match_repeat_encodeBetterBlockAsm512K:
XORL BX, BX
LEAL -4(BX)(SI*8), BX
MOVB BL, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBetterBlockAsm512K:
MOVL AX, 12(SP)
JMP search_loop_encodeBetterBlockAsm512K
no_repeat_found_encodeBetterBlockAsm512K:
CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm512K
CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm512K
MOVL 20(SP), AX
JMP search_loop_encodeBetterBlockAsm512K
candidateS_match_encodeBetterBlockAsm512K:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
MOVL (BX)(R10*4), SI
INCL AX
MOVL AX, (BX)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm512K
DECL AX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm512K:
MOVL 12(SP), BX
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm512K
match_extend_back_loop_encodeBetterBlockAsm512K:
CMPL AX, BX
JBE match_extend_back_end_encodeBetterBlockAsm512K
MOVB -1(DX)(SI*1), DI
MOVB -1(DX)(AX*1), R8
CMPB DI, R8
JNE match_extend_back_end_encodeBetterBlockAsm512K
LEAL -1(AX), AX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm512K
JMP match_extend_back_loop_encodeBetterBlockAsm512K
match_extend_back_end_encodeBetterBlockAsm512K:
MOVL AX, BX
SUBL 12(SP), BX
LEAQ 4(CX)(BX*1), BX
CMPQ BX, (SP)
JB match_dst_size_check_encodeBetterBlockAsm512K
MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm512K:
MOVL AX, BX
ADDL $0x04, AX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), R9
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm512K
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm512K:
MOVQ (R8)(R11*1), R10
MOVQ 8(R8)(R11*1), R12
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm512K
XORQ 8(R9)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm512K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm512K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBetterBlockAsm512K
JMP matchlen_match8_match_nolit_encodeBetterBlockAsm512K
matchlen_bsf_16match_nolit_encodeBetterBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBetterBlockAsm512K
matchlen_match8_match_nolit_encodeBetterBlockAsm512K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm512K
MOVQ (R8)(R11*1), R10
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm512K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm512K
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm512K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm512K
matchlen_match4_match_nolit_encodeBetterBlockAsm512K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm512K
MOVL (R8)(R11*1), R10
CMPL (R9)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm512K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBetterBlockAsm512K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm512K
JB match_nolit_end_encodeBetterBlockAsm512K
MOVW (R8)(R11*1), R10
CMPW (R9)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm512K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBetterBlockAsm512K
matchlen_match1_match_nolit_encodeBetterBlockAsm512K:
MOVB (R8)(R11*1), R10
CMPB (R9)(R11*1), R10
JNE match_nolit_end_encodeBetterBlockAsm512K
LEAL 1(R11), R11
match_nolit_end_encodeBetterBlockAsm512K:
MOVL AX, DI
SUBL SI, DI
CMPL R11, $0x01
JA match_length_ok_encodeBetterBlockAsm512K
CMPL DI, $0x0001003f
JBE match_length_ok_encodeBetterBlockAsm512K
MOVL 20(SP), AX
INCL AX
JMP search_loop_encodeBetterBlockAsm512K
match_length_ok_encodeBetterBlockAsm512K:
MOVL DI, 16(SP)
// Check if we can combine lit+copy
MOVLQZX 12(SP), R8
MOVL BX, SI
SUBL R8, SI
JZ match_emit_nolits_encodeBetterBlockAsm512K
CMPL DI, $0x00000040
JL match_emit_lits_encodeBetterBlockAsm512K
CMPL DI, $0x0001003f
JA match_emit_copy3_encodeBetterBlockAsm512K
CMPL SI, $0x04
JA match_emit_lits_encodeBetterBlockAsm512K
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, DI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, DI
CMOVLLT R11, DI
LEAL -1(SI)(DI*4), DI
MOVL $0x00000003, R10
LEAL (R10)(DI*8), DI
MOVB DI, (CX)
ADDQ $0x03, CX
MOVL R8, (CX)
ADDQ SI, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm512K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm512K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm512K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm512K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm512K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm512K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm512K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
match_emit_copy3_encodeBetterBlockAsm512K:
CMPL SI, $0x03
JA match_emit_lits_encodeBetterBlockAsm512K
MOVLQZX 12(SP), R8
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(DI), DI
SHLL $0x0b, DI
LEAL 7(DI)(SI*8), DI
CMPL R11, $0x3c
JBE emit_copy3_0_match_emit_lits_encodeBetterBlockAsm512K
LEAL -60(R11), R9
CMPL R11, $0x0000013c
JB emit_copy3_1_match_emit_lits_encodeBetterBlockAsm512K
CMPL R11, $0x0001003c
JB emit_copy3_2_match_emit_lits_encodeBetterBlockAsm512K
ADDL $0x000007e0, DI
MOVL DI, (CX)
MOVL R9, 4(CX)
ADDQ $0x07, CX
JMP match_emit_copy_litsencodeBetterBlockAsm512K
emit_copy3_2_match_emit_lits_encodeBetterBlockAsm512K:
ADDL $0x000007c0, DI
MOVL DI, (CX)
MOVW R9, 4(CX)
ADDQ $0x06, CX
JMP match_emit_copy_litsencodeBetterBlockAsm512K
emit_copy3_1_match_emit_lits_encodeBetterBlockAsm512K:
ADDL $0x000007a0, DI
MOVL DI, (CX)
MOVB R9, 4(CX)
ADDQ $0x05, CX
JMP match_emit_copy_litsencodeBetterBlockAsm512K
emit_copy3_0_match_emit_lits_encodeBetterBlockAsm512K:
SHLL $0x05, R11
ORL R11, DI
MOVL DI, (CX)
ADDQ $0x04, CX
match_emit_copy_litsencodeBetterBlockAsm512K:
MOVL R8, (CX)
ADDQ SI, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
match_emit_lits_encodeBetterBlockAsm512K:
LEAQ (DX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBetterBlockAsm512K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm512K
CMPL R9, $0x00010000
JB three_bytes_match_emit_encodeBetterBlockAsm512K
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm512K
three_bytes_match_emit_encodeBetterBlockAsm512K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm512K
two_bytes_match_emit_encodeBetterBlockAsm512K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBetterBlockAsm512K
JMP memmove_long_match_emit_encodeBetterBlockAsm512K
one_byte_match_emit_encodeBetterBlockAsm512K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 1
CMPQ SI, $0x08
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_8
CMPQ SI, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_8:
MOVQ (R8), R10
MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm512K
emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_8through16:
MOVQ (R8), R10
MOVQ -8(R8)(SI*1), R8
MOVQ R10, (CX)
MOVQ R8, -8(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm512K
emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm512K
emit_lit_memmove_match_emit_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm512K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm512K
memmove_midmatch_emit_encodeBetterBlockAsm512K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_match_emit_encodeBetterBlockAsm512K
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_match_emit_encodeBetterBlockAsm512K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm512K
memmove_long_match_emit_encodeBetterBlockAsm512K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
LEAQ -32(R8)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm512Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm512Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm512Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R13*1), X4
MOVOU -16(R8)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ SI, R13
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
match_emit_nolits_encodeBetterBlockAsm512K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy
CMPL DI, $0x0001003f
JBE two_byte_offset_match_nolit_encodeBetterBlockAsm512K
// emitCopy3
LEAL -4(R11), R11
LEAL -65536(DI), SI
SHLL $0x0b, SI
ADDL $0x07, SI
CMPL R11, $0x3c
JBE emit_copy3_0_match_nolit_encodeBetterBlockAsm512K_emit3
LEAL -60(R11), DI
CMPL R11, $0x0000013c
JB emit_copy3_1_match_nolit_encodeBetterBlockAsm512K_emit3
CMPL R11, $0x0001003c
JB emit_copy3_2_match_nolit_encodeBetterBlockAsm512K_emit3
ADDL $0x000007e0, SI
MOVL SI, (CX)
MOVL DI, 4(CX)
ADDQ $0x07, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_copy3_2_match_nolit_encodeBetterBlockAsm512K_emit3:
ADDL $0x000007c0, SI
MOVL SI, (CX)
MOVW DI, 4(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_copy3_1_match_nolit_encodeBetterBlockAsm512K_emit3:
ADDL $0x000007a0, SI
MOVL SI, (CX)
MOVB DI, 4(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_copy3_0_match_nolit_encodeBetterBlockAsm512K_emit3:
SHLL $0x05, R11
ORL R11, SI
MOVL SI, (CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
two_byte_offset_match_nolit_encodeBetterBlockAsm512K:
CMPL DI, $0x00000400
JA two_byte_match_nolit_encodeBetterBlockAsm512K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBetterBlockAsm512K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_one_longer_match_nolit_encodeBetterBlockAsm512K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBetterBlockAsm512K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_copy1_repeat_match_nolit_encodeBetterBlockAsm512K:
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm512K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm512K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm512K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm512K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm512K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm512K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
two_byte_match_nolit_encodeBetterBlockAsm512K:
// emitCopy2
LEAL -64(DI), DI
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBetterBlockAsm512K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBetterBlockAsm512K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBetterBlockAsm512K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_copy2_2_match_nolit_encodeBetterBlockAsm512K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_copy2_1_match_nolit_encodeBetterBlockAsm512K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
emit_copy2_0_match_nolit_encodeBetterBlockAsm512K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm512K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_match_emit_repeat_encodeBetterBlockAsm512K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm512K
CMPL SI, $0x00010000
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm512K
MOVL SI, R9
SHRL $0x10, R9
MOVB $0xf8, (CX)
MOVW SI, 1(CX)
MOVB R9, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm512K
three_bytes_match_emit_repeat_encodeBetterBlockAsm512K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm512K
two_bytes_match_emit_repeat_encodeBetterBlockAsm512K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midmatch_emit_repeat_encodeBetterBlockAsm512K
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm512K
one_byte_match_emit_repeat_encodeBetterBlockAsm512K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm512K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm512K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm512K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm512K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm512K
memmove_midmatch_emit_repeat_encodeBetterBlockAsm512K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm512K
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm512K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm512K
memmove_long_match_emit_repeat_encodeBetterBlockAsm512K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R12
SUBQ R9, R12
DECQ R10
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R9
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm512Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R9
ADDQ $0x20, R12
DECQ R10
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm512Klarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm512Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ DI, R12
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm512K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_match_nolit_repeat_encodeBetterBlockAsm512K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_match_nolit_repeat_encodeBetterBlockAsm512K
CMPL R11, $0x0001001e
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm512K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_three_match_nolit_repeat_encodeBetterBlockAsm512K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_two_match_nolit_repeat_encodeBetterBlockAsm512K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm512K
repeat_one_match_nolit_repeat_encodeBetterBlockAsm512K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm512K:
CMPL AX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm512K
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm512K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm512K:
MOVQ tmp+48(FP), SI
MOVQ $0x00cf1bbcdcbfa563, DI
MOVQ $0x9e3779b1, R8
LEAQ 1(BX), BX
LEAQ -2(AX), R9
MOVQ (DX)(BX*1), R10
MOVQ 1(DX)(BX*1), R11
MOVQ (DX)(R9*1), R12
MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ DI, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x33, R11
SHLQ $0x08, R12
IMULQ DI, R12
SHRQ $0x30, R12
SHLQ $0x20, R13
IMULQ R8, R13
SHRQ $0x33, R13
LEAQ 1(BX), R8
LEAQ 1(R9), R14
MOVL BX, (SI)(R10*4)
MOVL R9, (SI)(R12*4)
LEAQ 1(R9)(BX*1), R10
SHRQ $0x01, R10
ADDQ $0x01, BX
SUBQ $0x01, R9
MOVL R8, 262144(SI)(R11*4)
MOVL R14, 262144(SI)(R13*4)
index_loop_encodeBetterBlockAsm512K:
CMPQ R10, R9
JAE search_loop_encodeBetterBlockAsm512K
MOVQ (DX)(BX*1), R8
MOVQ (DX)(R10*1), R11
SHLQ $0x08, R8
IMULQ DI, R8
SHRQ $0x30, R8
SHLQ $0x08, R11
IMULQ DI, R11
SHRQ $0x30, R11
MOVL BX, (SI)(R8*4)
MOVL R9, (SI)(R11*4)
ADDQ $0x02, BX
ADDQ $0x02, R10
JMP index_loop_encodeBetterBlockAsm512K
emit_remainder_encodeBetterBlockAsm512K:
MOVQ src_len+32(FP), AX
SUBL 12(SP), AX
LEAQ 4(CX)(AX*1), AX
CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm512K
MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm512K:
MOVQ src_len+32(FP), AX
// emitLiteralsDstP
MOVL 12(SP), BX
CMPL BX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm512K
MOVL AX, SI
MOVL AX, 12(SP)
LEAQ (DX)(BX*1), AX
SUBL BX, SI
// emitLiteral
LEAL -1(SI), DX
CMPL DX, $0x1d
JB one_byte_emit_remainder_encodeBetterBlockAsm512K
SUBL $0x1d, DX
CMPL DX, $0x00000100
JB two_bytes_emit_remainder_encodeBetterBlockAsm512K
CMPL DX, $0x00010000
JB three_bytes_emit_remainder_encodeBetterBlockAsm512K
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB BL, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm512K
three_bytes_emit_remainder_encodeBetterBlockAsm512K:
MOVB $0xf0, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm512K
two_bytes_emit_remainder_encodeBetterBlockAsm512K:
MOVB $0xe8, (CX)
MOVB DL, 1(CX)
ADDL $0x1d, DX
ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_midemit_remainder_encodeBetterBlockAsm512K
JMP memmove_long_emit_remainder_encodeBetterBlockAsm512K
one_byte_emit_remainder_encodeBetterBlockAsm512K:
SHLB $0x03, DL
MOVB DL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -1, min move: 1
CMPQ BX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_3
CMPQ BX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_4through8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_1or2:
MOVB (AX), SI
MOVB -1(AX)(BX*1), AL
MOVB SI, (CX)
MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_3:
MOVW (AX), SI
MOVB 2(AX), AL
MOVW SI, (CX)
MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_4through8:
MOVL (AX), SI
MOVL -4(AX)(BX*1), AX
MOVL SI, (CX)
MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_8through16:
MOVQ (AX), SI
MOVQ -8(AX)(BX*1), AX
MOVQ SI, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm512K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm512K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm512K
memmove_midemit_remainder_encodeBetterBlockAsm512K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -2, min move: 30
CMPQ BX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm512K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm512K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm512K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm512K
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm512K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm512K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm512K
memmove_long_emit_remainder_encodeBetterBlockAsm512K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
LEAQ -32(AX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm512Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm512Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm512Klarge_forward_sse_loop_32:
MOVOU -32(AX)(R8*1), X4
MOVOU -16(AX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm512Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm512K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBetterBlockAsm64K(dst []byte, src []byte, tmp *[73728]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBetterBlockAsm64K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000240, DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm64K:
MOVOU X0, (AX)
MOVOU X0, 16(AX)
MOVOU X0, 32(AX)
MOVOU X0, 48(AX)
MOVOU X0, 64(AX)
MOVOU X0, 80(AX)
MOVOU X0, 96(AX)
MOVOU X0, 112(AX)
ADDQ $0x80, AX
DECQ DX
JNZ zero_loop_encodeBetterBlockAsm64K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), AX
LEAQ -11(AX), DX
LEAQ -8(AX), BX
MOVL BX, 8(SP)
SHRQ $0x05, AX
SUBL AX, DX
LEAQ (CX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, AX
MOVL AX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm64K:
MOVQ tmp+48(FP), BX
MOVL AX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 1(AX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm64K
MOVQ (DX)(AX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x31, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x34, R11
MOVWLZX (BX)(R10*2), SI
MOVWLZX 65536(BX)(R11*2), R8
MOVW AX, (BX)(R10*2)
MOVW AX, 65536(BX)(R11*2)
MOVQ (DX)(SI*1), R10
CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm64K
MOVQ (DX)(R8*1), R11
CMPQ R11, DI
MOVL AX, R12
SUBL 16(SP), R12
MOVQ (DX)(R12*1), R12
MOVQ $0x000000ffffffff00, R13
XORQ DI, R12
TESTQ R13, R12
JNE no_repeat_found_encodeBetterBlockAsm64K
LEAL 1(AX), BX
MOVL 12(SP), SI
MOVL BX, DI
SUBL 16(SP), DI
JZ repeat_extend_back_end_encodeBetterBlockAsm64K
repeat_extend_back_loop_encodeBetterBlockAsm64K:
CMPL BX, SI
JBE repeat_extend_back_end_encodeBetterBlockAsm64K
MOVB -1(DX)(DI*1), R8
MOVB -1(DX)(BX*1), R9
CMPB R8, R9
JNE repeat_extend_back_end_encodeBetterBlockAsm64K
LEAL -1(BX), BX
DECL DI
JNZ repeat_extend_back_loop_encodeBetterBlockAsm64K
repeat_extend_back_end_encodeBetterBlockAsm64K:
MOVL BX, SI
SUBL 12(SP), SI
LEAQ 4(CX)(SI*1), SI
CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBetterBlockAsm64K
MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBetterBlockAsm64K:
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_repeat_emit_encodeBetterBlockAsm64K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_repeat_emit_encodeBetterBlockAsm64K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBetterBlockAsm64K
JB three_bytes_repeat_emit_encodeBetterBlockAsm64K
MOVL SI, R9
SHRL $0x10, R9
MOVB $0xf8, (CX)
MOVW SI, 1(CX)
MOVB R9, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm64K
three_bytes_repeat_emit_encodeBetterBlockAsm64K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm64K
two_bytes_repeat_emit_encodeBetterBlockAsm64K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midrepeat_emit_encodeBetterBlockAsm64K
JMP memmove_long_repeat_emit_encodeBetterBlockAsm64K
one_byte_repeat_emit_encodeBetterBlockAsm64K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm64K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm64K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm64K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_repeat_emit_encodeBetterBlockAsm64K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm64K
memmove_midrepeat_emit_encodeBetterBlockAsm64K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm64K
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm64K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm64K
memmove_long_repeat_emit_encodeBetterBlockAsm64K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R11
SUBQ R9, R11
DECQ R10
JA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(R8)(R11*1), R9
LEAQ -32(CX)(R11*1), R12
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm64Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R12)
MOVOA X5, 16(R12)
ADDQ $0x20, R12
ADDQ $0x20, R9
ADDQ $0x20, R11
DECQ R10
JNA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R11*1), X4
MOVOU -16(R8)(R11*1), X5
MOVOA X4, -32(CX)(R11*1)
MOVOA X5, -16(CX)(R11*1)
ADDQ $0x20, R11
CMPQ DI, R11
JAE emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBetterBlockAsm64K:
ADDL $0x05, AX
MOVL AX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
JMP matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm64K
matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm64K:
MOVQ (R8)(R10*1), R9
MOVQ 8(R8)(R10*1), R11
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm64K
XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16repeat_extend_encodeBetterBlockAsm64K
LEAL -16(DI), DI
LEAL 16(R10), R10
matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm64K:
CMPL DI, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm64K
JMP matchlen_match8_repeat_extend_encodeBetterBlockAsm64K
matchlen_bsf_16repeat_extend_encodeBetterBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R11, R11
#else
BSFQ R11, R11
#endif
SARQ $0x03, R11
LEAL 8(R10)(R11*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm64K
matchlen_match8_repeat_extend_encodeBetterBlockAsm64K:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeBetterBlockAsm64K
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm64K
LEAL -8(DI), DI
LEAL 8(R10), R10
JMP matchlen_match4_repeat_extend_encodeBetterBlockAsm64K
matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm64K
matchlen_match4_repeat_extend_encodeBetterBlockAsm64K:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeBetterBlockAsm64K
MOVL (R8)(R10*1), R9
CMPL (SI)(R10*1), R9
JNE matchlen_match2_repeat_extend_encodeBetterBlockAsm64K
LEAL -4(DI), DI
LEAL 4(R10), R10
matchlen_match2_repeat_extend_encodeBetterBlockAsm64K:
CMPL DI, $0x01
JE matchlen_match1_repeat_extend_encodeBetterBlockAsm64K
JB repeat_extend_forward_end_encodeBetterBlockAsm64K
MOVW (R8)(R10*1), R9
CMPW (SI)(R10*1), R9
JNE matchlen_match1_repeat_extend_encodeBetterBlockAsm64K
LEAL 2(R10), R10
SUBL $0x02, DI
JZ repeat_extend_forward_end_encodeBetterBlockAsm64K
matchlen_match1_repeat_extend_encodeBetterBlockAsm64K:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE repeat_extend_forward_end_encodeBetterBlockAsm64K
LEAL 1(R10), R10
repeat_extend_forward_end_encodeBetterBlockAsm64K:
ADDL R10, AX
MOVL AX, SI
SUBL BX, SI
MOVL 16(SP), BX
// emitRepeat
LEAL -1(SI), BX
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBetterBlockAsm64K
LEAL -30(SI), BX
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBetterBlockAsm64K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBetterBlockAsm64K
MOVB $0xfc, (CX)
MOVL BX, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBetterBlockAsm64K
repeat_three_match_repeat_encodeBetterBlockAsm64K:
MOVB $0xf4, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBetterBlockAsm64K
repeat_two_match_repeat_encodeBetterBlockAsm64K:
MOVB $0xec, (CX)
MOVB BL, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBetterBlockAsm64K
repeat_one_match_repeat_encodeBetterBlockAsm64K:
XORL BX, BX
LEAL -4(BX)(SI*8), BX
MOVB BL, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBetterBlockAsm64K:
MOVL AX, 12(SP)
JMP search_loop_encodeBetterBlockAsm64K
no_repeat_found_encodeBetterBlockAsm64K:
CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm64K
CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm64K
MOVL 20(SP), AX
JMP search_loop_encodeBetterBlockAsm64K
candidateS_match_encodeBetterBlockAsm64K:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x31, R10
MOVWLZX (BX)(R10*2), SI
INCL AX
MOVW AX, (BX)(R10*2)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm64K
DECL AX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm64K:
MOVL 12(SP), BX
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm64K
match_extend_back_loop_encodeBetterBlockAsm64K:
CMPL AX, BX
JBE match_extend_back_end_encodeBetterBlockAsm64K
MOVB -1(DX)(SI*1), DI
MOVB -1(DX)(AX*1), R8
CMPB DI, R8
JNE match_extend_back_end_encodeBetterBlockAsm64K
LEAL -1(AX), AX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm64K
JMP match_extend_back_loop_encodeBetterBlockAsm64K
match_extend_back_end_encodeBetterBlockAsm64K:
MOVL AX, BX
SUBL 12(SP), BX
LEAQ 4(CX)(BX*1), BX
CMPQ BX, (SP)
JB match_dst_size_check_encodeBetterBlockAsm64K
MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm64K:
MOVL AX, BX
ADDL $0x04, AX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), R9
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm64K
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm64K:
MOVQ (R8)(R11*1), R10
MOVQ 8(R8)(R11*1), R12
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm64K
XORQ 8(R9)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm64K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm64K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBetterBlockAsm64K
JMP matchlen_match8_match_nolit_encodeBetterBlockAsm64K
matchlen_bsf_16match_nolit_encodeBetterBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBetterBlockAsm64K
matchlen_match8_match_nolit_encodeBetterBlockAsm64K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm64K
MOVQ (R8)(R11*1), R10
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm64K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm64K
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm64K
matchlen_match4_match_nolit_encodeBetterBlockAsm64K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm64K
MOVL (R8)(R11*1), R10
CMPL (R9)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm64K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBetterBlockAsm64K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm64K
JB match_nolit_end_encodeBetterBlockAsm64K
MOVW (R8)(R11*1), R10
CMPW (R9)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm64K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBetterBlockAsm64K
matchlen_match1_match_nolit_encodeBetterBlockAsm64K:
MOVB (R8)(R11*1), R10
CMPB (R9)(R11*1), R10
JNE match_nolit_end_encodeBetterBlockAsm64K
LEAL 1(R11), R11
match_nolit_end_encodeBetterBlockAsm64K:
MOVL AX, DI
SUBL SI, DI
MOVL DI, 16(SP)
// Check if we can combine lit+copy
MOVLQZX 12(SP), R8
MOVL BX, SI
SUBL R8, SI
JZ match_emit_nolits_encodeBetterBlockAsm64K
CMPL DI, $0x00000040
JL match_emit_lits_encodeBetterBlockAsm64K
CMPL SI, $0x04
JA match_emit_lits_encodeBetterBlockAsm64K
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, DI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, DI
CMOVLLT R11, DI
LEAL -1(SI)(DI*4), DI
MOVL $0x00000003, R10
LEAL (R10)(DI*8), DI
MOVB DI, (CX)
ADDQ $0x03, CX
MOVL R8, (CX)
ADDQ SI, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm64K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm64K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm64K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm64K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm64K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm64K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm64K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
match_emit_lits_encodeBetterBlockAsm64K:
LEAQ (DX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBetterBlockAsm64K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm64K
JB three_bytes_match_emit_encodeBetterBlockAsm64K
MOVL R9, R10
SHRL $0x10, R10
MOVB $0xf8, (CX)
MOVW R9, 1(CX)
MOVB R10, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm64K
three_bytes_match_emit_encodeBetterBlockAsm64K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm64K
two_bytes_match_emit_encodeBetterBlockAsm64K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBetterBlockAsm64K
JMP memmove_long_match_emit_encodeBetterBlockAsm64K
one_byte_match_emit_encodeBetterBlockAsm64K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 1
CMPQ SI, $0x08
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_8
CMPQ SI, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_8:
MOVQ (R8), R10
MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_8through16:
MOVQ (R8), R10
MOVQ -8(R8)(SI*1), R8
MOVQ R10, (CX)
MOVQ R8, -8(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm64K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm64K
memmove_midmatch_emit_encodeBetterBlockAsm64K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_match_emit_encodeBetterBlockAsm64K
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_match_emit_encodeBetterBlockAsm64K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm64K
memmove_long_match_emit_encodeBetterBlockAsm64K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(R8)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm64Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R13*1), X4
MOVOU -16(R8)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ SI, R13
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
match_emit_nolits_encodeBetterBlockAsm64K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy
CMPL DI, $0x00000400
JA two_byte_match_nolit_encodeBetterBlockAsm64K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBetterBlockAsm64K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
emit_one_longer_match_nolit_encodeBetterBlockAsm64K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBetterBlockAsm64K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
emit_copy1_repeat_match_nolit_encodeBetterBlockAsm64K:
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm64K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm64K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm64K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm64K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm64K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm64K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
two_byte_match_nolit_encodeBetterBlockAsm64K:
// emitCopy2
LEAL -64(DI), DI
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBetterBlockAsm64K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBetterBlockAsm64K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBetterBlockAsm64K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
emit_copy2_2_match_nolit_encodeBetterBlockAsm64K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
emit_copy2_1_match_nolit_encodeBetterBlockAsm64K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
emit_copy2_0_match_nolit_encodeBetterBlockAsm64K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm64K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_match_emit_repeat_encodeBetterBlockAsm64K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm64K
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm64K
MOVL SI, R9
SHRL $0x10, R9
MOVB $0xf8, (CX)
MOVW SI, 1(CX)
MOVB R9, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm64K
three_bytes_match_emit_repeat_encodeBetterBlockAsm64K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm64K
two_bytes_match_emit_repeat_encodeBetterBlockAsm64K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midmatch_emit_repeat_encodeBetterBlockAsm64K
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm64K
one_byte_match_emit_repeat_encodeBetterBlockAsm64K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm64K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm64K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm64K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm64K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm64K
memmove_midmatch_emit_repeat_encodeBetterBlockAsm64K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm64K
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm64K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm64K
memmove_long_match_emit_repeat_encodeBetterBlockAsm64K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R12
SUBQ R9, R12
DECQ R10
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R9
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm64Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R9
ADDQ $0x20, R12
DECQ R10
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ DI, R12
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm64K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_match_nolit_repeat_encodeBetterBlockAsm64K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_match_nolit_repeat_encodeBetterBlockAsm64K
CMPL R11, $0x0001001e
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm64K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_three_match_nolit_repeat_encodeBetterBlockAsm64K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_two_match_nolit_repeat_encodeBetterBlockAsm64K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm64K
repeat_one_match_nolit_repeat_encodeBetterBlockAsm64K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm64K:
CMPL AX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm64K
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm64K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm64K:
MOVQ tmp+48(FP), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ $0x9e3779b1, R8
LEAQ 1(BX), BX
LEAQ -2(AX), R9
MOVQ (DX)(BX*1), R10
MOVQ 1(DX)(BX*1), R11
MOVQ (DX)(R9*1), R12
MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ DI, R10
SHRQ $0x31, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
SHLQ $0x10, R12
IMULQ DI, R12
SHRQ $0x31, R12
SHLQ $0x20, R13
IMULQ R8, R13
SHRQ $0x34, R13
LEAQ 1(BX), R8
LEAQ 1(R9), R14
MOVW BX, (SI)(R10*2)
MOVW R9, (SI)(R12*2)
LEAQ 1(R9)(BX*1), R10
SHRQ $0x01, R10
ADDQ $0x01, BX
SUBQ $0x01, R9
MOVW R8, 65536(SI)(R11*2)
MOVW R14, 65536(SI)(R13*2)
index_loop_encodeBetterBlockAsm64K:
CMPQ R10, R9
JAE search_loop_encodeBetterBlockAsm64K
MOVQ (DX)(BX*1), R8
MOVQ (DX)(R10*1), R11
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x31, R8
SHLQ $0x10, R11
IMULQ DI, R11
SHRQ $0x31, R11
MOVW BX, (SI)(R8*2)
MOVW R9, (SI)(R11*2)
ADDQ $0x02, BX
ADDQ $0x02, R10
JMP index_loop_encodeBetterBlockAsm64K
emit_remainder_encodeBetterBlockAsm64K:
MOVQ src_len+32(FP), AX
SUBL 12(SP), AX
LEAQ 4(CX)(AX*1), AX
CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm64K
MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm64K:
MOVQ src_len+32(FP), AX
// emitLiteralsDstP
MOVL 12(SP), BX
CMPL BX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm64K
MOVL AX, SI
MOVL AX, 12(SP)
LEAQ (DX)(BX*1), AX
SUBL BX, SI
// emitLiteral
LEAL -1(SI), DX
CMPL DX, $0x1d
JB one_byte_emit_remainder_encodeBetterBlockAsm64K
SUBL $0x1d, DX
CMPL DX, $0x00000100
JB two_bytes_emit_remainder_encodeBetterBlockAsm64K
JB three_bytes_emit_remainder_encodeBetterBlockAsm64K
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB BL, 3(CX)
ADDQ $0x04, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm64K
three_bytes_emit_remainder_encodeBetterBlockAsm64K:
MOVB $0xf0, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm64K
two_bytes_emit_remainder_encodeBetterBlockAsm64K:
MOVB $0xe8, (CX)
MOVB DL, 1(CX)
ADDL $0x1d, DX
ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_midemit_remainder_encodeBetterBlockAsm64K
JMP memmove_long_emit_remainder_encodeBetterBlockAsm64K
one_byte_emit_remainder_encodeBetterBlockAsm64K:
SHLB $0x03, DL
MOVB DL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -1, min move: 1
CMPQ BX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_3
CMPQ BX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_4through8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_1or2:
MOVB (AX), SI
MOVB -1(AX)(BX*1), AL
MOVB SI, (CX)
MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_3:
MOVW (AX), SI
MOVB 2(AX), AL
MOVW SI, (CX)
MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_4through8:
MOVL (AX), SI
MOVL -4(AX)(BX*1), AX
MOVL SI, (CX)
MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_8through16:
MOVQ (AX), SI
MOVQ -8(AX)(BX*1), AX
MOVQ SI, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm64K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm64K
memmove_midemit_remainder_encodeBetterBlockAsm64K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -2, min move: 30
CMPQ BX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm64K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm64K
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm64K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm64K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm64K
memmove_long_emit_remainder_encodeBetterBlockAsm64K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(AX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm64Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(AX)(R8*1), X4
MOVOU -16(AX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm64K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBetterBlockAsm16K(dst []byte, src []byte, tmp *[36864]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBetterBlockAsm16K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000120, DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm16K:
MOVOU X0, (AX)
MOVOU X0, 16(AX)
MOVOU X0, 32(AX)
MOVOU X0, 48(AX)
MOVOU X0, 64(AX)
MOVOU X0, 80(AX)
MOVOU X0, 96(AX)
MOVOU X0, 112(AX)
ADDQ $0x80, AX
DECQ DX
JNZ zero_loop_encodeBetterBlockAsm16K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), AX
LEAQ -11(AX), DX
LEAQ -8(AX), BX
MOVL BX, 8(SP)
SHRQ $0x05, AX
SUBL AX, DX
LEAQ (CX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, AX
MOVL AX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm16K:
MOVQ tmp+48(FP), BX
MOVL AX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 1(AX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm16K
MOVQ (DX)(AX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x35, R11
MOVWLZX (BX)(R10*2), SI
MOVWLZX 32768(BX)(R11*2), R8
MOVW AX, (BX)(R10*2)
MOVW AX, 32768(BX)(R11*2)
MOVQ (DX)(SI*1), R10
CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm16K
MOVQ (DX)(R8*1), R11
CMPQ R11, DI
MOVL AX, R12
SUBL 16(SP), R12
MOVQ (DX)(R12*1), R12
MOVQ $0x000000ffffffff00, R13
XORQ DI, R12
TESTQ R13, R12
JNE no_repeat_found_encodeBetterBlockAsm16K
LEAL 1(AX), BX
MOVL 12(SP), SI
MOVL BX, DI
SUBL 16(SP), DI
JZ repeat_extend_back_end_encodeBetterBlockAsm16K
repeat_extend_back_loop_encodeBetterBlockAsm16K:
CMPL BX, SI
JBE repeat_extend_back_end_encodeBetterBlockAsm16K
MOVB -1(DX)(DI*1), R8
MOVB -1(DX)(BX*1), R9
CMPB R8, R9
JNE repeat_extend_back_end_encodeBetterBlockAsm16K
LEAL -1(BX), BX
DECL DI
JNZ repeat_extend_back_loop_encodeBetterBlockAsm16K
repeat_extend_back_end_encodeBetterBlockAsm16K:
MOVL BX, SI
SUBL 12(SP), SI
LEAQ 3(CX)(SI*1), SI
CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBetterBlockAsm16K
MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBetterBlockAsm16K:
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_repeat_emit_encodeBetterBlockAsm16K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_repeat_emit_encodeBetterBlockAsm16K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBetterBlockAsm16K
JB three_bytes_repeat_emit_encodeBetterBlockAsm16K
three_bytes_repeat_emit_encodeBetterBlockAsm16K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm16K
two_bytes_repeat_emit_encodeBetterBlockAsm16K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midrepeat_emit_encodeBetterBlockAsm16K
JMP memmove_long_repeat_emit_encodeBetterBlockAsm16K
one_byte_repeat_emit_encodeBetterBlockAsm16K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm16K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm16K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm16K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_repeat_emit_encodeBetterBlockAsm16K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm16K
memmove_midrepeat_emit_encodeBetterBlockAsm16K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm16K
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm16K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm16K
memmove_long_repeat_emit_encodeBetterBlockAsm16K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R11
SUBQ R9, R11
DECQ R10
JA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
LEAQ -32(R8)(R11*1), R9
LEAQ -32(CX)(R11*1), R12
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm16Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R12)
MOVOA X5, 16(R12)
ADDQ $0x20, R12
ADDQ $0x20, R9
ADDQ $0x20, R11
DECQ R10
JNA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm16Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm16Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R11*1), X4
MOVOU -16(R8)(R11*1), X5
MOVOA X4, -32(CX)(R11*1)
MOVOA X5, -16(CX)(R11*1)
ADDQ $0x20, R11
CMPQ DI, R11
JAE emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBetterBlockAsm16K:
ADDL $0x05, AX
MOVL AX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
JMP matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm16K
matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm16K:
MOVQ (R8)(R10*1), R9
MOVQ 8(R8)(R10*1), R11
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm16K
XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16repeat_extend_encodeBetterBlockAsm16K
LEAL -16(DI), DI
LEAL 16(R10), R10
matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm16K:
CMPL DI, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm16K
JMP matchlen_match8_repeat_extend_encodeBetterBlockAsm16K
matchlen_bsf_16repeat_extend_encodeBetterBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R11, R11
#else
BSFQ R11, R11
#endif
SARQ $0x03, R11
LEAL 8(R10)(R11*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm16K
matchlen_match8_repeat_extend_encodeBetterBlockAsm16K:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeBetterBlockAsm16K
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm16K
LEAL -8(DI), DI
LEAL 8(R10), R10
JMP matchlen_match4_repeat_extend_encodeBetterBlockAsm16K
matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm16K
matchlen_match4_repeat_extend_encodeBetterBlockAsm16K:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeBetterBlockAsm16K
MOVL (R8)(R10*1), R9
CMPL (SI)(R10*1), R9
JNE matchlen_match2_repeat_extend_encodeBetterBlockAsm16K
LEAL -4(DI), DI
LEAL 4(R10), R10
matchlen_match2_repeat_extend_encodeBetterBlockAsm16K:
CMPL DI, $0x01
JE matchlen_match1_repeat_extend_encodeBetterBlockAsm16K
JB repeat_extend_forward_end_encodeBetterBlockAsm16K
MOVW (R8)(R10*1), R9
CMPW (SI)(R10*1), R9
JNE matchlen_match1_repeat_extend_encodeBetterBlockAsm16K
LEAL 2(R10), R10
SUBL $0x02, DI
JZ repeat_extend_forward_end_encodeBetterBlockAsm16K
matchlen_match1_repeat_extend_encodeBetterBlockAsm16K:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE repeat_extend_forward_end_encodeBetterBlockAsm16K
LEAL 1(R10), R10
repeat_extend_forward_end_encodeBetterBlockAsm16K:
ADDL R10, AX
MOVL AX, SI
SUBL BX, SI
MOVL 16(SP), BX
// emitRepeat
LEAL -1(SI), BX
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBetterBlockAsm16K
LEAL -30(SI), BX
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBetterBlockAsm16K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBetterBlockAsm16K
MOVB $0xfc, (CX)
MOVL BX, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBetterBlockAsm16K
repeat_three_match_repeat_encodeBetterBlockAsm16K:
MOVB $0xf4, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBetterBlockAsm16K
repeat_two_match_repeat_encodeBetterBlockAsm16K:
MOVB $0xec, (CX)
MOVB BL, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBetterBlockAsm16K
repeat_one_match_repeat_encodeBetterBlockAsm16K:
XORL BX, BX
LEAL -4(BX)(SI*8), BX
MOVB BL, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBetterBlockAsm16K:
MOVL AX, 12(SP)
JMP search_loop_encodeBetterBlockAsm16K
no_repeat_found_encodeBetterBlockAsm16K:
CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm16K
CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm16K
MOVL 20(SP), AX
JMP search_loop_encodeBetterBlockAsm16K
candidateS_match_encodeBetterBlockAsm16K:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVWLZX (BX)(R10*2), SI
INCL AX
MOVW AX, (BX)(R10*2)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm16K
DECL AX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm16K:
MOVL 12(SP), BX
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm16K
match_extend_back_loop_encodeBetterBlockAsm16K:
CMPL AX, BX
JBE match_extend_back_end_encodeBetterBlockAsm16K
MOVB -1(DX)(SI*1), DI
MOVB -1(DX)(AX*1), R8
CMPB DI, R8
JNE match_extend_back_end_encodeBetterBlockAsm16K
LEAL -1(AX), AX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm16K
JMP match_extend_back_loop_encodeBetterBlockAsm16K
match_extend_back_end_encodeBetterBlockAsm16K:
MOVL AX, BX
SUBL 12(SP), BX
LEAQ 3(CX)(BX*1), BX
CMPQ BX, (SP)
JB match_dst_size_check_encodeBetterBlockAsm16K
MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm16K:
MOVL AX, BX
ADDL $0x04, AX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), R9
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm16K
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm16K:
MOVQ (R8)(R11*1), R10
MOVQ 8(R8)(R11*1), R12
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm16K
XORQ 8(R9)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm16K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm16K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBetterBlockAsm16K
JMP matchlen_match8_match_nolit_encodeBetterBlockAsm16K
matchlen_bsf_16match_nolit_encodeBetterBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBetterBlockAsm16K
matchlen_match8_match_nolit_encodeBetterBlockAsm16K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm16K
MOVQ (R8)(R11*1), R10
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm16K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm16K
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm16K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm16K
matchlen_match4_match_nolit_encodeBetterBlockAsm16K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm16K
MOVL (R8)(R11*1), R10
CMPL (R9)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm16K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBetterBlockAsm16K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm16K
JB match_nolit_end_encodeBetterBlockAsm16K
MOVW (R8)(R11*1), R10
CMPW (R9)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm16K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBetterBlockAsm16K
matchlen_match1_match_nolit_encodeBetterBlockAsm16K:
MOVB (R8)(R11*1), R10
CMPB (R9)(R11*1), R10
JNE match_nolit_end_encodeBetterBlockAsm16K
LEAL 1(R11), R11
match_nolit_end_encodeBetterBlockAsm16K:
MOVL AX, DI
SUBL SI, DI
MOVL DI, 16(SP)
// Check if we can combine lit+copy
MOVLQZX 12(SP), R8
MOVL BX, SI
SUBL R8, SI
JZ match_emit_nolits_encodeBetterBlockAsm16K
CMPL DI, $0x00000040
JL match_emit_lits_encodeBetterBlockAsm16K
CMPL SI, $0x04
JA match_emit_lits_encodeBetterBlockAsm16K
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, DI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, DI
CMOVLLT R11, DI
LEAL -1(SI)(DI*4), DI
MOVL $0x00000003, R10
LEAL (R10)(DI*8), DI
MOVB DI, (CX)
ADDQ $0x03, CX
MOVL R8, (CX)
ADDQ SI, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm16K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm16K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm16K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm16K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm16K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm16K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm16K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
match_emit_lits_encodeBetterBlockAsm16K:
LEAQ (DX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBetterBlockAsm16K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm16K
JB three_bytes_match_emit_encodeBetterBlockAsm16K
three_bytes_match_emit_encodeBetterBlockAsm16K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm16K
two_bytes_match_emit_encodeBetterBlockAsm16K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBetterBlockAsm16K
JMP memmove_long_match_emit_encodeBetterBlockAsm16K
one_byte_match_emit_encodeBetterBlockAsm16K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 1
CMPQ SI, $0x08
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_8
CMPQ SI, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_8:
MOVQ (R8), R10
MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm16K
emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_8through16:
MOVQ (R8), R10
MOVQ -8(R8)(SI*1), R8
MOVQ R10, (CX)
MOVQ R8, -8(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm16K
emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm16K
emit_lit_memmove_match_emit_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm16K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm16K
memmove_midmatch_emit_encodeBetterBlockAsm16K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_match_emit_encodeBetterBlockAsm16K
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_match_emit_encodeBetterBlockAsm16K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm16K
memmove_long_match_emit_encodeBetterBlockAsm16K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
LEAQ -32(R8)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm16Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm16Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm16Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R13*1), X4
MOVOU -16(R8)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ SI, R13
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
match_emit_nolits_encodeBetterBlockAsm16K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy
CMPL DI, $0x00000400
JA two_byte_match_nolit_encodeBetterBlockAsm16K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBetterBlockAsm16K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
emit_one_longer_match_nolit_encodeBetterBlockAsm16K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBetterBlockAsm16K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
emit_copy1_repeat_match_nolit_encodeBetterBlockAsm16K:
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm16K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm16K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm16K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm16K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm16K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm16K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
two_byte_match_nolit_encodeBetterBlockAsm16K:
// emitCopy2
LEAL -64(DI), DI
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBetterBlockAsm16K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBetterBlockAsm16K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBetterBlockAsm16K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
emit_copy2_2_match_nolit_encodeBetterBlockAsm16K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
emit_copy2_1_match_nolit_encodeBetterBlockAsm16K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
emit_copy2_0_match_nolit_encodeBetterBlockAsm16K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm16K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_match_emit_repeat_encodeBetterBlockAsm16K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm16K
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm16K
three_bytes_match_emit_repeat_encodeBetterBlockAsm16K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm16K
two_bytes_match_emit_repeat_encodeBetterBlockAsm16K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midmatch_emit_repeat_encodeBetterBlockAsm16K
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm16K
one_byte_match_emit_repeat_encodeBetterBlockAsm16K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm16K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm16K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm16K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm16K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm16K
memmove_midmatch_emit_repeat_encodeBetterBlockAsm16K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm16K
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm16K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm16K
memmove_long_match_emit_repeat_encodeBetterBlockAsm16K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R12
SUBQ R9, R12
DECQ R10
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R9
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm16Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R9
ADDQ $0x20, R12
DECQ R10
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm16Klarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm16Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ DI, R12
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm16K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_match_nolit_repeat_encodeBetterBlockAsm16K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_match_nolit_repeat_encodeBetterBlockAsm16K
CMPL R11, $0x0001001e
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm16K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_three_match_nolit_repeat_encodeBetterBlockAsm16K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_two_match_nolit_repeat_encodeBetterBlockAsm16K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm16K
repeat_one_match_nolit_repeat_encodeBetterBlockAsm16K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm16K:
CMPL AX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm16K
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm16K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm16K:
MOVQ tmp+48(FP), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ $0x9e3779b1, R8
LEAQ 1(BX), BX
LEAQ -2(AX), R9
MOVQ (DX)(BX*1), R10
MOVQ 1(DX)(BX*1), R11
MOVQ (DX)(R9*1), R12
MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ DI, R10
SHRQ $0x32, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x35, R11
SHLQ $0x10, R12
IMULQ DI, R12
SHRQ $0x32, R12
SHLQ $0x20, R13
IMULQ R8, R13
SHRQ $0x35, R13
LEAQ 1(BX), R8
LEAQ 1(R9), R14
MOVW BX, (SI)(R10*2)
MOVW R9, (SI)(R12*2)
LEAQ 1(R9)(BX*1), R10
SHRQ $0x01, R10
ADDQ $0x01, BX
SUBQ $0x01, R9
MOVW R8, 32768(SI)(R11*2)
MOVW R14, 32768(SI)(R13*2)
index_loop_encodeBetterBlockAsm16K:
CMPQ R10, R9
JAE search_loop_encodeBetterBlockAsm16K
MOVQ (DX)(BX*1), R8
MOVQ (DX)(R10*1), R11
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x32, R8
SHLQ $0x10, R11
IMULQ DI, R11
SHRQ $0x32, R11
MOVW BX, (SI)(R8*2)
MOVW R9, (SI)(R11*2)
ADDQ $0x02, BX
ADDQ $0x02, R10
JMP index_loop_encodeBetterBlockAsm16K
emit_remainder_encodeBetterBlockAsm16K:
MOVQ src_len+32(FP), AX
SUBL 12(SP), AX
LEAQ 3(CX)(AX*1), AX
CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm16K
MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm16K:
MOVQ src_len+32(FP), AX
// emitLiteralsDstP
MOVL 12(SP), BX
CMPL BX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm16K
MOVL AX, SI
MOVL AX, 12(SP)
LEAQ (DX)(BX*1), AX
SUBL BX, SI
// emitLiteral
LEAL -1(SI), DX
CMPL DX, $0x1d
JB one_byte_emit_remainder_encodeBetterBlockAsm16K
SUBL $0x1d, DX
CMPL DX, $0x00000100
JB two_bytes_emit_remainder_encodeBetterBlockAsm16K
JB three_bytes_emit_remainder_encodeBetterBlockAsm16K
three_bytes_emit_remainder_encodeBetterBlockAsm16K:
MOVB $0xf0, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm16K
two_bytes_emit_remainder_encodeBetterBlockAsm16K:
MOVB $0xe8, (CX)
MOVB DL, 1(CX)
ADDL $0x1d, DX
ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_midemit_remainder_encodeBetterBlockAsm16K
JMP memmove_long_emit_remainder_encodeBetterBlockAsm16K
one_byte_emit_remainder_encodeBetterBlockAsm16K:
SHLB $0x03, DL
MOVB DL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -1, min move: 1
CMPQ BX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_3
CMPQ BX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_4through8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_1or2:
MOVB (AX), SI
MOVB -1(AX)(BX*1), AL
MOVB SI, (CX)
MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_3:
MOVW (AX), SI
MOVB 2(AX), AL
MOVW SI, (CX)
MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_4through8:
MOVL (AX), SI
MOVL -4(AX)(BX*1), AX
MOVL SI, (CX)
MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_8through16:
MOVQ (AX), SI
MOVQ -8(AX)(BX*1), AX
MOVQ SI, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm16K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm16K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm16K
memmove_midemit_remainder_encodeBetterBlockAsm16K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -2, min move: 30
CMPQ BX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm16K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm16K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm16K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm16K
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm16K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm16K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm16K
memmove_long_emit_remainder_encodeBetterBlockAsm16K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
LEAQ -32(AX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm16Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm16Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm16Klarge_forward_sse_loop_32:
MOVOU -32(AX)(R8*1), X4
MOVOU -16(AX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm16Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm16K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBetterBlockAsm4K(dst []byte, src []byte, tmp *[10240]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBetterBlockAsm4K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000050, DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm4K:
MOVOU X0, (AX)
MOVOU X0, 16(AX)
MOVOU X0, 32(AX)
MOVOU X0, 48(AX)
MOVOU X0, 64(AX)
MOVOU X0, 80(AX)
MOVOU X0, 96(AX)
MOVOU X0, 112(AX)
ADDQ $0x80, AX
DECQ DX
JNZ zero_loop_encodeBetterBlockAsm4K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), AX
LEAQ -11(AX), DX
LEAQ -8(AX), BX
MOVL BX, 8(SP)
SHRQ $0x05, AX
SUBL AX, DX
LEAQ (CX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, AX
MOVL AX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm4K:
MOVQ tmp+48(FP), BX
MOVL AX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 1(AX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm4K
MOVQ (DX)(AX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x34, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x36, R11
MOVWLZX (BX)(R10*2), SI
MOVWLZX 8192(BX)(R11*2), R8
MOVW AX, (BX)(R10*2)
MOVW AX, 8192(BX)(R11*2)
MOVQ (DX)(SI*1), R10
CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm4K
MOVQ (DX)(R8*1), R11
CMPQ R11, DI
MOVL AX, R12
SUBL 16(SP), R12
MOVQ (DX)(R12*1), R12
MOVQ $0x000000ffffffff00, R13
XORQ DI, R12
TESTQ R13, R12
JNE no_repeat_found_encodeBetterBlockAsm4K
LEAL 1(AX), BX
MOVL 12(SP), SI
MOVL BX, DI
SUBL 16(SP), DI
JZ repeat_extend_back_end_encodeBetterBlockAsm4K
repeat_extend_back_loop_encodeBetterBlockAsm4K:
CMPL BX, SI
JBE repeat_extend_back_end_encodeBetterBlockAsm4K
MOVB -1(DX)(DI*1), R8
MOVB -1(DX)(BX*1), R9
CMPB R8, R9
JNE repeat_extend_back_end_encodeBetterBlockAsm4K
LEAL -1(BX), BX
DECL DI
JNZ repeat_extend_back_loop_encodeBetterBlockAsm4K
repeat_extend_back_end_encodeBetterBlockAsm4K:
MOVL BX, SI
SUBL 12(SP), SI
LEAQ 3(CX)(SI*1), SI
CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBetterBlockAsm4K
MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBetterBlockAsm4K:
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_repeat_emit_encodeBetterBlockAsm4K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_repeat_emit_encodeBetterBlockAsm4K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBetterBlockAsm4K
JB three_bytes_repeat_emit_encodeBetterBlockAsm4K
three_bytes_repeat_emit_encodeBetterBlockAsm4K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm4K
two_bytes_repeat_emit_encodeBetterBlockAsm4K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midrepeat_emit_encodeBetterBlockAsm4K
JMP memmove_long_repeat_emit_encodeBetterBlockAsm4K
one_byte_repeat_emit_encodeBetterBlockAsm4K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm4K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm4K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm4K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_repeat_emit_encodeBetterBlockAsm4K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm4K
memmove_midrepeat_emit_encodeBetterBlockAsm4K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm4K
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm4K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm4K
memmove_long_repeat_emit_encodeBetterBlockAsm4K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R11
SUBQ R9, R11
DECQ R10
JA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
LEAQ -32(R8)(R11*1), R9
LEAQ -32(CX)(R11*1), R12
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm4Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R12)
MOVOA X5, 16(R12)
ADDQ $0x20, R12
ADDQ $0x20, R9
ADDQ $0x20, R11
DECQ R10
JNA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm4Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm4Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R11*1), X4
MOVOU -16(R8)(R11*1), X5
MOVOA X4, -32(CX)(R11*1)
MOVOA X5, -16(CX)(R11*1)
ADDQ $0x20, R11
CMPQ DI, R11
JAE emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBetterBlockAsm4K:
ADDL $0x05, AX
MOVL AX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
JMP matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm4K
matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm4K:
MOVQ (R8)(R10*1), R9
MOVQ 8(R8)(R10*1), R11
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm4K
XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16repeat_extend_encodeBetterBlockAsm4K
LEAL -16(DI), DI
LEAL 16(R10), R10
matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm4K:
CMPL DI, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm4K
JMP matchlen_match8_repeat_extend_encodeBetterBlockAsm4K
matchlen_bsf_16repeat_extend_encodeBetterBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R11, R11
#else
BSFQ R11, R11
#endif
SARQ $0x03, R11
LEAL 8(R10)(R11*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm4K
matchlen_match8_repeat_extend_encodeBetterBlockAsm4K:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeBetterBlockAsm4K
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm4K
LEAL -8(DI), DI
LEAL 8(R10), R10
JMP matchlen_match4_repeat_extend_encodeBetterBlockAsm4K
matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm4K
matchlen_match4_repeat_extend_encodeBetterBlockAsm4K:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeBetterBlockAsm4K
MOVL (R8)(R10*1), R9
CMPL (SI)(R10*1), R9
JNE matchlen_match2_repeat_extend_encodeBetterBlockAsm4K
LEAL -4(DI), DI
LEAL 4(R10), R10
matchlen_match2_repeat_extend_encodeBetterBlockAsm4K:
CMPL DI, $0x01
JE matchlen_match1_repeat_extend_encodeBetterBlockAsm4K
JB repeat_extend_forward_end_encodeBetterBlockAsm4K
MOVW (R8)(R10*1), R9
CMPW (SI)(R10*1), R9
JNE matchlen_match1_repeat_extend_encodeBetterBlockAsm4K
LEAL 2(R10), R10
SUBL $0x02, DI
JZ repeat_extend_forward_end_encodeBetterBlockAsm4K
matchlen_match1_repeat_extend_encodeBetterBlockAsm4K:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE repeat_extend_forward_end_encodeBetterBlockAsm4K
LEAL 1(R10), R10
repeat_extend_forward_end_encodeBetterBlockAsm4K:
ADDL R10, AX
MOVL AX, SI
SUBL BX, SI
MOVL 16(SP), BX
// emitRepeat
LEAL -1(SI), BX
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBetterBlockAsm4K
LEAL -30(SI), BX
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBetterBlockAsm4K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBetterBlockAsm4K
MOVB $0xfc, (CX)
MOVL BX, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBetterBlockAsm4K
repeat_three_match_repeat_encodeBetterBlockAsm4K:
MOVB $0xf4, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBetterBlockAsm4K
repeat_two_match_repeat_encodeBetterBlockAsm4K:
MOVB $0xec, (CX)
MOVB BL, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBetterBlockAsm4K
repeat_one_match_repeat_encodeBetterBlockAsm4K:
XORL BX, BX
LEAL -4(BX)(SI*8), BX
MOVB BL, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBetterBlockAsm4K:
MOVL AX, 12(SP)
JMP search_loop_encodeBetterBlockAsm4K
no_repeat_found_encodeBetterBlockAsm4K:
CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm4K
CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm4K
MOVL 20(SP), AX
JMP search_loop_encodeBetterBlockAsm4K
candidateS_match_encodeBetterBlockAsm4K:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x34, R10
MOVWLZX (BX)(R10*2), SI
INCL AX
MOVW AX, (BX)(R10*2)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm4K
DECL AX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm4K:
MOVL 12(SP), BX
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm4K
match_extend_back_loop_encodeBetterBlockAsm4K:
CMPL AX, BX
JBE match_extend_back_end_encodeBetterBlockAsm4K
MOVB -1(DX)(SI*1), DI
MOVB -1(DX)(AX*1), R8
CMPB DI, R8
JNE match_extend_back_end_encodeBetterBlockAsm4K
LEAL -1(AX), AX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm4K
JMP match_extend_back_loop_encodeBetterBlockAsm4K
match_extend_back_end_encodeBetterBlockAsm4K:
MOVL AX, BX
SUBL 12(SP), BX
LEAQ 3(CX)(BX*1), BX
CMPQ BX, (SP)
JB match_dst_size_check_encodeBetterBlockAsm4K
MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm4K:
MOVL AX, BX
ADDL $0x04, AX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), R9
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm4K
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4K:
MOVQ (R8)(R11*1), R10
MOVQ 8(R8)(R11*1), R12
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4K
XORQ 8(R9)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm4K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4K
JMP matchlen_match8_match_nolit_encodeBetterBlockAsm4K
matchlen_bsf_16match_nolit_encodeBetterBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBetterBlockAsm4K
matchlen_match8_match_nolit_encodeBetterBlockAsm4K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm4K
MOVQ (R8)(R11*1), R10
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4K
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm4K
matchlen_match4_match_nolit_encodeBetterBlockAsm4K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm4K
MOVL (R8)(R11*1), R10
CMPL (R9)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBetterBlockAsm4K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm4K
JB match_nolit_end_encodeBetterBlockAsm4K
MOVW (R8)(R11*1), R10
CMPW (R9)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBetterBlockAsm4K
matchlen_match1_match_nolit_encodeBetterBlockAsm4K:
MOVB (R8)(R11*1), R10
CMPB (R9)(R11*1), R10
JNE match_nolit_end_encodeBetterBlockAsm4K
LEAL 1(R11), R11
match_nolit_end_encodeBetterBlockAsm4K:
MOVL AX, DI
SUBL SI, DI
MOVL DI, 16(SP)
// Check if we can combine lit+copy
MOVLQZX 12(SP), R8
MOVL BX, SI
SUBL R8, SI
JZ match_emit_nolits_encodeBetterBlockAsm4K
CMPL DI, $0x00000040
JL match_emit_lits_encodeBetterBlockAsm4K
CMPL SI, $0x04
JA match_emit_lits_encodeBetterBlockAsm4K
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, DI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, DI
CMOVLLT R11, DI
LEAL -1(SI)(DI*4), DI
MOVL $0x00000003, R10
LEAL (R10)(DI*8), DI
MOVB DI, (CX)
ADDQ $0x03, CX
MOVL R8, (CX)
ADDQ SI, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm4K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm4K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm4K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm4K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm4K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm4K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
match_emit_lits_encodeBetterBlockAsm4K:
LEAQ (DX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBetterBlockAsm4K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm4K
JB three_bytes_match_emit_encodeBetterBlockAsm4K
three_bytes_match_emit_encodeBetterBlockAsm4K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm4K
two_bytes_match_emit_encodeBetterBlockAsm4K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBetterBlockAsm4K
JMP memmove_long_match_emit_encodeBetterBlockAsm4K
one_byte_match_emit_encodeBetterBlockAsm4K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 1
CMPQ SI, $0x08
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_8
CMPQ SI, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_8:
MOVQ (R8), R10
MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4K
emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_8through16:
MOVQ (R8), R10
MOVQ -8(R8)(SI*1), R8
MOVQ R10, (CX)
MOVQ R8, -8(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4K
emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4K
emit_lit_memmove_match_emit_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm4K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm4K
memmove_midmatch_emit_encodeBetterBlockAsm4K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_match_emit_encodeBetterBlockAsm4K
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_match_emit_encodeBetterBlockAsm4K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm4K
memmove_long_match_emit_encodeBetterBlockAsm4K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
LEAQ -32(R8)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R13*1), X4
MOVOU -16(R8)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ SI, R13
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
match_emit_nolits_encodeBetterBlockAsm4K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy
CMPL DI, $0x00000400
JA two_byte_match_nolit_encodeBetterBlockAsm4K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBetterBlockAsm4K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
emit_one_longer_match_nolit_encodeBetterBlockAsm4K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBetterBlockAsm4K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
emit_copy1_repeat_match_nolit_encodeBetterBlockAsm4K:
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm4K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm4K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm4K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm4K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm4K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm4K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
two_byte_match_nolit_encodeBetterBlockAsm4K:
// emitCopy2
LEAL -64(DI), DI
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBetterBlockAsm4K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBetterBlockAsm4K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBetterBlockAsm4K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
emit_copy2_2_match_nolit_encodeBetterBlockAsm4K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
emit_copy2_1_match_nolit_encodeBetterBlockAsm4K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
emit_copy2_0_match_nolit_encodeBetterBlockAsm4K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_match_emit_repeat_encodeBetterBlockAsm4K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4K
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4K
three_bytes_match_emit_repeat_encodeBetterBlockAsm4K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4K
two_bytes_match_emit_repeat_encodeBetterBlockAsm4K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midmatch_emit_repeat_encodeBetterBlockAsm4K
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4K
one_byte_match_emit_repeat_encodeBetterBlockAsm4K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4K
memmove_midmatch_emit_repeat_encodeBetterBlockAsm4K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm4K
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm4K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4K
memmove_long_match_emit_repeat_encodeBetterBlockAsm4K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R12
SUBQ R9, R12
DECQ R10
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R9
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R9
ADDQ $0x20, R12
DECQ R10
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4Klarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ DI, R12
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_match_nolit_repeat_encodeBetterBlockAsm4K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4K
CMPL R11, $0x0001001e
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_three_match_nolit_repeat_encodeBetterBlockAsm4K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_two_match_nolit_repeat_encodeBetterBlockAsm4K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4K
repeat_one_match_nolit_repeat_encodeBetterBlockAsm4K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm4K:
CMPL AX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm4K
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm4K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm4K:
MOVQ tmp+48(FP), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ $0x9e3779b1, R8
LEAQ 1(BX), BX
LEAQ -2(AX), R9
MOVQ (DX)(BX*1), R10
MOVQ 1(DX)(BX*1), R11
MOVQ (DX)(R9*1), R12
MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ DI, R10
SHRQ $0x34, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
SHLQ $0x10, R12
IMULQ DI, R12
SHRQ $0x34, R12
SHLQ $0x20, R13
IMULQ R8, R13
SHRQ $0x36, R13
LEAQ 1(BX), R8
LEAQ 1(R9), R14
MOVW BX, (SI)(R10*2)
MOVW R9, (SI)(R12*2)
LEAQ 1(R9)(BX*1), R10
SHRQ $0x01, R10
ADDQ $0x01, BX
SUBQ $0x01, R9
MOVW R8, 8192(SI)(R11*2)
MOVW R14, 8192(SI)(R13*2)
index_loop_encodeBetterBlockAsm4K:
CMPQ R10, R9
JAE search_loop_encodeBetterBlockAsm4K
MOVQ (DX)(BX*1), R8
MOVQ (DX)(R10*1), R11
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x34, R8
SHLQ $0x10, R11
IMULQ DI, R11
SHRQ $0x34, R11
MOVW BX, (SI)(R8*2)
MOVW R9, (SI)(R11*2)
ADDQ $0x02, BX
ADDQ $0x02, R10
JMP index_loop_encodeBetterBlockAsm4K
emit_remainder_encodeBetterBlockAsm4K:
MOVQ src_len+32(FP), AX
SUBL 12(SP), AX
LEAQ 3(CX)(AX*1), AX
CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm4K
MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm4K:
MOVQ src_len+32(FP), AX
// emitLiteralsDstP
MOVL 12(SP), BX
CMPL BX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4K
MOVL AX, SI
MOVL AX, 12(SP)
LEAQ (DX)(BX*1), AX
SUBL BX, SI
// emitLiteral
LEAL -1(SI), DX
CMPL DX, $0x1d
JB one_byte_emit_remainder_encodeBetterBlockAsm4K
SUBL $0x1d, DX
CMPL DX, $0x00000100
JB two_bytes_emit_remainder_encodeBetterBlockAsm4K
JB three_bytes_emit_remainder_encodeBetterBlockAsm4K
three_bytes_emit_remainder_encodeBetterBlockAsm4K:
MOVB $0xf0, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4K
two_bytes_emit_remainder_encodeBetterBlockAsm4K:
MOVB $0xe8, (CX)
MOVB DL, 1(CX)
ADDL $0x1d, DX
ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_midemit_remainder_encodeBetterBlockAsm4K
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4K
one_byte_emit_remainder_encodeBetterBlockAsm4K:
SHLB $0x03, DL
MOVB DL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -1, min move: 1
CMPQ BX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_3
CMPQ BX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_4through8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_1or2:
MOVB (AX), SI
MOVB -1(AX)(BX*1), AL
MOVB SI, (CX)
MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_3:
MOVW (AX), SI
MOVB 2(AX), AL
MOVW SI, (CX)
MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_4through8:
MOVL (AX), SI
MOVL -4(AX)(BX*1), AX
MOVL SI, (CX)
MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_8through16:
MOVQ (AX), SI
MOVQ -8(AX)(BX*1), AX
MOVQ SI, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm4K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4K
memmove_midemit_remainder_encodeBetterBlockAsm4K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -2, min move: 30
CMPQ BX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm4K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm4K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm4K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm4K
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm4K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm4K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4K
memmove_long_emit_remainder_encodeBetterBlockAsm4K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
LEAQ -32(AX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4Klarge_forward_sse_loop_32:
MOVOU -32(AX)(R8*1), X4
MOVOU -16(AX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm4K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func encodeBetterBlockAsm1K(dst []byte, src []byte, tmp *[4608]byte) int
// Requires: BMI, CMOV, SSE2
TEXT ·encodeBetterBlockAsm1K(SB), $24-64
MOVQ tmp+48(FP), AX
MOVQ dst_base+0(FP), CX
MOVQ $0x00000024, DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm1K:
MOVOU X0, (AX)
MOVOU X0, 16(AX)
MOVOU X0, 32(AX)
MOVOU X0, 48(AX)
MOVOU X0, 64(AX)
MOVOU X0, 80(AX)
MOVOU X0, 96(AX)
MOVOU X0, 112(AX)
ADDQ $0x80, AX
DECQ DX
JNZ zero_loop_encodeBetterBlockAsm1K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), AX
LEAQ -11(AX), DX
LEAQ -8(AX), BX
MOVL BX, 8(SP)
SHRQ $0x05, AX
SUBL AX, DX
LEAQ (CX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, AX
MOVL AX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm1K:
MOVQ tmp+48(FP), BX
MOVL AX, SI
SUBL 12(SP), SI
SHRL $0x04, SI
LEAL 1(AX)(SI*1), SI
CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm1K
MOVQ (DX)(AX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x35, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x38, R11
MOVWLZX (BX)(R10*2), SI
MOVWLZX 4096(BX)(R11*2), R8
MOVW AX, (BX)(R10*2)
MOVW AX, 4096(BX)(R11*2)
MOVQ (DX)(SI*1), R10
CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm1K
MOVQ (DX)(R8*1), R11
CMPQ R11, DI
MOVL AX, R12
SUBL 16(SP), R12
MOVQ (DX)(R12*1), R12
MOVQ $0x000000ffffffff00, R13
XORQ DI, R12
TESTQ R13, R12
JNE no_repeat_found_encodeBetterBlockAsm1K
LEAL 1(AX), BX
MOVL 12(SP), SI
MOVL BX, DI
SUBL 16(SP), DI
JZ repeat_extend_back_end_encodeBetterBlockAsm1K
repeat_extend_back_loop_encodeBetterBlockAsm1K:
CMPL BX, SI
JBE repeat_extend_back_end_encodeBetterBlockAsm1K
MOVB -1(DX)(DI*1), R8
MOVB -1(DX)(BX*1), R9
CMPB R8, R9
JNE repeat_extend_back_end_encodeBetterBlockAsm1K
LEAL -1(BX), BX
DECL DI
JNZ repeat_extend_back_loop_encodeBetterBlockAsm1K
repeat_extend_back_end_encodeBetterBlockAsm1K:
MOVL BX, SI
SUBL 12(SP), SI
LEAQ 3(CX)(SI*1), SI
CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBetterBlockAsm1K
MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBetterBlockAsm1K:
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_repeat_emit_encodeBetterBlockAsm1K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_repeat_emit_encodeBetterBlockAsm1K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBetterBlockAsm1K
JB three_bytes_repeat_emit_encodeBetterBlockAsm1K
three_bytes_repeat_emit_encodeBetterBlockAsm1K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_repeat_emit_encodeBetterBlockAsm1K
two_bytes_repeat_emit_encodeBetterBlockAsm1K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midrepeat_emit_encodeBetterBlockAsm1K
JMP memmove_long_repeat_emit_encodeBetterBlockAsm1K
one_byte_repeat_emit_encodeBetterBlockAsm1K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm1K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm1K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_repeat_emit_encodeBetterBlockAsm1K
emit_lit_memmove_repeat_emit_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_repeat_emit_encodeBetterBlockAsm1K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm1K
memmove_midrepeat_emit_encodeBetterBlockAsm1K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm1K
emit_lit_memmove_mid_repeat_emit_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_repeat_emit_encodeBetterBlockAsm1K:
MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBetterBlockAsm1K
memmove_long_repeat_emit_encodeBetterBlockAsm1K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R11
SUBQ R9, R11
DECQ R10
JA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
LEAQ -32(R8)(R11*1), R9
LEAQ -32(CX)(R11*1), R12
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm1Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R12)
MOVOA X5, 16(R12)
ADDQ $0x20, R12
ADDQ $0x20, R9
ADDQ $0x20, R11
DECQ R10
JNA emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm1Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm1Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R11*1), X4
MOVOU -16(R8)(R11*1), X5
MOVOA X4, -32(CX)(R11*1)
MOVOA X5, -16(CX)(R11*1)
ADDQ $0x20, R11
CMPQ DI, R11
JAE emit_lit_memmove_long_repeat_emit_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBetterBlockAsm1K:
ADDL $0x05, AX
MOVL AX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
JMP matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm1K
matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm1K:
MOVQ (R8)(R10*1), R9
MOVQ 8(R8)(R10*1), R11
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm1K
XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16repeat_extend_encodeBetterBlockAsm1K
LEAL -16(DI), DI
LEAL 16(R10), R10
matchlen_loop_16_entry_repeat_extend_encodeBetterBlockAsm1K:
CMPL DI, $0x10
JAE matchlen_loopback_16_repeat_extend_encodeBetterBlockAsm1K
JMP matchlen_match8_repeat_extend_encodeBetterBlockAsm1K
matchlen_bsf_16repeat_extend_encodeBetterBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R11, R11
#else
BSFQ R11, R11
#endif
SARQ $0x03, R11
LEAL 8(R10)(R11*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm1K
matchlen_match8_repeat_extend_encodeBetterBlockAsm1K:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeBetterBlockAsm1K
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm1K
LEAL -8(DI), DI
LEAL 8(R10), R10
JMP matchlen_match4_repeat_extend_encodeBetterBlockAsm1K
matchlen_bsf_8_repeat_extend_encodeBetterBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
#else
BSFQ R9, R9
#endif
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeBetterBlockAsm1K
matchlen_match4_repeat_extend_encodeBetterBlockAsm1K:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeBetterBlockAsm1K
MOVL (R8)(R10*1), R9
CMPL (SI)(R10*1), R9
JNE matchlen_match2_repeat_extend_encodeBetterBlockAsm1K
LEAL -4(DI), DI
LEAL 4(R10), R10
matchlen_match2_repeat_extend_encodeBetterBlockAsm1K:
CMPL DI, $0x01
JE matchlen_match1_repeat_extend_encodeBetterBlockAsm1K
JB repeat_extend_forward_end_encodeBetterBlockAsm1K
MOVW (R8)(R10*1), R9
CMPW (SI)(R10*1), R9
JNE matchlen_match1_repeat_extend_encodeBetterBlockAsm1K
LEAL 2(R10), R10
SUBL $0x02, DI
JZ repeat_extend_forward_end_encodeBetterBlockAsm1K
matchlen_match1_repeat_extend_encodeBetterBlockAsm1K:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE repeat_extend_forward_end_encodeBetterBlockAsm1K
LEAL 1(R10), R10
repeat_extend_forward_end_encodeBetterBlockAsm1K:
ADDL R10, AX
MOVL AX, SI
SUBL BX, SI
MOVL 16(SP), BX
// emitRepeat
LEAL -1(SI), BX
CMPL SI, $0x1d
JBE repeat_one_match_repeat_encodeBetterBlockAsm1K
LEAL -30(SI), BX
CMPL SI, $0x0000011e
JB repeat_two_match_repeat_encodeBetterBlockAsm1K
CMPL SI, $0x0001001e
JB repeat_three_match_repeat_encodeBetterBlockAsm1K
MOVB $0xfc, (CX)
MOVL BX, 1(CX)
ADDQ $0x04, CX
JMP repeat_end_emit_encodeBetterBlockAsm1K
repeat_three_match_repeat_encodeBetterBlockAsm1K:
MOVB $0xf4, (CX)
MOVW BX, 1(CX)
ADDQ $0x03, CX
JMP repeat_end_emit_encodeBetterBlockAsm1K
repeat_two_match_repeat_encodeBetterBlockAsm1K:
MOVB $0xec, (CX)
MOVB BL, 1(CX)
ADDQ $0x02, CX
JMP repeat_end_emit_encodeBetterBlockAsm1K
repeat_one_match_repeat_encodeBetterBlockAsm1K:
XORL BX, BX
LEAL -4(BX)(SI*8), BX
MOVB BL, (CX)
ADDQ $0x01, CX
repeat_end_emit_encodeBetterBlockAsm1K:
MOVL AX, 12(SP)
JMP search_loop_encodeBetterBlockAsm1K
no_repeat_found_encodeBetterBlockAsm1K:
CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm1K
CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm1K
MOVL 20(SP), AX
JMP search_loop_encodeBetterBlockAsm1K
candidateS_match_encodeBetterBlockAsm1K:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x35, R10
MOVWLZX (BX)(R10*2), SI
INCL AX
MOVW AX, (BX)(R10*2)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm1K
DECL AX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm1K:
MOVL 12(SP), BX
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm1K
match_extend_back_loop_encodeBetterBlockAsm1K:
CMPL AX, BX
JBE match_extend_back_end_encodeBetterBlockAsm1K
MOVB -1(DX)(SI*1), DI
MOVB -1(DX)(AX*1), R8
CMPB DI, R8
JNE match_extend_back_end_encodeBetterBlockAsm1K
LEAL -1(AX), AX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm1K
JMP match_extend_back_loop_encodeBetterBlockAsm1K
match_extend_back_end_encodeBetterBlockAsm1K:
MOVL AX, BX
SUBL 12(SP), BX
LEAQ 3(CX)(BX*1), BX
CMPQ BX, (SP)
JB match_dst_size_check_encodeBetterBlockAsm1K
MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm1K:
MOVL AX, BX
ADDL $0x04, AX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL AX, DI
LEAQ (DX)(AX*1), R8
LEAQ (DX)(SI*1), R9
// matchLen
XORL R11, R11
JMP matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm1K
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm1K:
MOVQ (R8)(R11*1), R10
MOVQ 8(R8)(R11*1), R12
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm1K
XORQ 8(R9)(R11*1), R12
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm1K
LEAL -16(DI), DI
LEAL 16(R11), R11
matchlen_loop_16_entry_match_nolit_encodeBetterBlockAsm1K:
CMPL DI, $0x10
JAE matchlen_loopback_16_match_nolit_encodeBetterBlockAsm1K
JMP matchlen_match8_match_nolit_encodeBetterBlockAsm1K
matchlen_bsf_16match_nolit_encodeBetterBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R12, R12
#else
BSFQ R12, R12
#endif
SARQ $0x03, R12
LEAL 8(R11)(R12*1), R11
JMP match_nolit_end_encodeBetterBlockAsm1K
matchlen_match8_match_nolit_encodeBetterBlockAsm1K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm1K
MOVQ (R8)(R11*1), R10
XORQ (R9)(R11*1), R10
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm1K
LEAL -8(DI), DI
LEAL 8(R11), R11
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm1K
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm1K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
#else
BSFQ R10, R10
#endif
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm1K
matchlen_match4_match_nolit_encodeBetterBlockAsm1K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm1K
MOVL (R8)(R11*1), R10
CMPL (R9)(R11*1), R10
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm1K
LEAL -4(DI), DI
LEAL 4(R11), R11
matchlen_match2_match_nolit_encodeBetterBlockAsm1K:
CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm1K
JB match_nolit_end_encodeBetterBlockAsm1K
MOVW (R8)(R11*1), R10
CMPW (R9)(R11*1), R10
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm1K
LEAL 2(R11), R11
SUBL $0x02, DI
JZ match_nolit_end_encodeBetterBlockAsm1K
matchlen_match1_match_nolit_encodeBetterBlockAsm1K:
MOVB (R8)(R11*1), R10
CMPB (R9)(R11*1), R10
JNE match_nolit_end_encodeBetterBlockAsm1K
LEAL 1(R11), R11
match_nolit_end_encodeBetterBlockAsm1K:
MOVL AX, DI
SUBL SI, DI
MOVL DI, 16(SP)
// Check if we can combine lit+copy
MOVLQZX 12(SP), R8
MOVL BX, SI
SUBL R8, SI
JZ match_emit_nolits_encodeBetterBlockAsm1K
CMPL DI, $0x00000040
JL match_emit_lits_encodeBetterBlockAsm1K
CMPL SI, $0x04
JA match_emit_lits_encodeBetterBlockAsm1K
MOVL (DX)(R8*1), R8
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy2WithLits
XORQ R9, R9
SUBL $0x40, DI
LEAL -11(R11), R10
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x07
CMOVLGE R10, R9
MOVQ $0x00000007, DI
CMOVLLT R11, DI
LEAL -1(SI)(DI*4), DI
MOVL $0x00000003, R10
LEAL (R10)(DI*8), DI
MOVB DI, (CX)
ADDQ $0x03, CX
MOVL R8, (CX)
ADDQ SI, CX
TESTL R9, R9
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm1K
// emitRepeat
LEAL -1(R9), SI
CMPL R9, $0x1d
JBE repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm1K
LEAL -30(R9), SI
CMPL R9, $0x0000011e
JB repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm1K
CMPL R9, $0x0001001e
JB repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm1K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_three_match_emit_repeat_copy2_encodeBetterBlockAsm1K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_two_match_emit_repeat_copy2_encodeBetterBlockAsm1K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_one_match_emit_repeat_copy2_encodeBetterBlockAsm1K:
XORL SI, SI
LEAL -4(SI)(R9*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
match_emit_lits_encodeBetterBlockAsm1K:
LEAQ (DX)(R8*1), R8
// emitLiteral
LEAL -1(SI), R9
CMPL R9, $0x1d
JB one_byte_match_emit_encodeBetterBlockAsm1K
SUBL $0x1d, R9
CMPL R9, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm1K
JB three_bytes_match_emit_encodeBetterBlockAsm1K
three_bytes_match_emit_encodeBetterBlockAsm1K:
MOVB $0xf0, (CX)
MOVW R9, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, R9
JMP memmove_long_match_emit_encodeBetterBlockAsm1K
two_bytes_match_emit_encodeBetterBlockAsm1K:
MOVB $0xe8, (CX)
MOVB R9, 1(CX)
ADDL $0x1d, R9
ADDQ $0x02, CX
CMPL R9, $0x40
JB memmove_midmatch_emit_encodeBetterBlockAsm1K
JMP memmove_long_match_emit_encodeBetterBlockAsm1K
one_byte_match_emit_encodeBetterBlockAsm1K:
SHLB $0x03, R9
MOVB R9, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 1
CMPQ SI, $0x08
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_8
CMPQ SI, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_8through16
CMPQ SI, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_8:
MOVQ (R8), R10
MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm1K
emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_8through16:
MOVQ (R8), R10
MOVQ -8(R8)(SI*1), R8
MOVQ R10, (CX)
MOVQ R8, -8(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm1K
emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm1K
emit_lit_memmove_match_emit_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm1K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm1K
memmove_midmatch_emit_encodeBetterBlockAsm1K:
LEAQ (CX)(SI*1), R9
// genMemMoveShort
// margin: 8, min move: 30
CMPQ SI, $0x20
JBE emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(SI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(SI*1)
JMP memmove_mid_end_copy_match_emit_encodeBetterBlockAsm1K
emit_lit_memmove_mid_match_emit_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
memmove_mid_end_copy_match_emit_encodeBetterBlockAsm1K:
MOVQ R9, CX
JMP match_emit_nolits_encodeBetterBlockAsm1K
memmove_long_match_emit_encodeBetterBlockAsm1K:
LEAQ (CX)(SI*1), R9
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(SI*1), X2
MOVOU -16(R8)(SI*1), X3
MOVQ SI, R12
SHRQ $0x05, R12
MOVQ CX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R12
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
LEAQ -32(R8)(R13*1), R10
LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm1Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm1Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm1Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R13*1), X4
MOVOU -16(R8)(R13*1), X5
MOVOA X4, -32(CX)(R13*1)
MOVOA X5, -16(CX)(R13*1)
ADDQ $0x20, R13
CMPQ SI, R13
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(SI*1)
MOVOU X3, -16(CX)(SI*1)
MOVQ R9, CX
match_emit_nolits_encodeBetterBlockAsm1K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitCopy
CMPL DI, $0x00000400
JA two_byte_match_nolit_encodeBetterBlockAsm1K
CMPL R11, $0x00000013
JAE emit_one_longer_match_nolit_encodeBetterBlockAsm1K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL -15(SI)(R11*4), SI
MOVW SI, (CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
emit_one_longer_match_nolit_encodeBetterBlockAsm1K:
CMPL R11, $0x00000112
JAE emit_copy1_repeat_match_nolit_encodeBetterBlockAsm1K
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 61(SI), SI
MOVW SI, (CX)
LEAL -18(R11), SI
MOVB SI, 2(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
emit_copy1_repeat_match_nolit_encodeBetterBlockAsm1K:
LEAL -1(DI), SI
SHLL $0x06, SI
LEAL 57(SI), SI
MOVW SI, (CX)
ADDQ $0x02, CX
SUBL $0x12, R11
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm1K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm1K
CMPL R11, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm1K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_three_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm1K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_two_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm1K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_one_emit_copy1_do_repeat_match_nolit_encodeBetterBlockAsm1K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
two_byte_match_nolit_encodeBetterBlockAsm1K:
// emitCopy2
LEAL -64(DI), DI
LEAL -4(R11), R11
MOVW DI, 1(CX)
CMPL R11, $0x3c
JBE emit_copy2_0_match_nolit_encodeBetterBlockAsm1K_emit2
LEAL -60(R11), SI
CMPL R11, $0x0000013c
JB emit_copy2_1_match_nolit_encodeBetterBlockAsm1K_emit2
CMPL R11, $0x0001003c
JB emit_copy2_2_match_nolit_encodeBetterBlockAsm1K_emit2
MOVB $0xfe, (CX)
MOVL SI, 3(CX)
ADDQ $0x06, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
emit_copy2_2_match_nolit_encodeBetterBlockAsm1K_emit2:
MOVB $0xfa, (CX)
MOVW SI, 3(CX)
ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
emit_copy2_1_match_nolit_encodeBetterBlockAsm1K_emit2:
MOVB $0xf6, (CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
emit_copy2_0_match_nolit_encodeBetterBlockAsm1K_emit2:
MOVL $0x00000002, SI
LEAL (SI)(R11*4), SI
MOVB SI, (CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
// emitLiteralsDstP
MOVL 12(SP), SI
CMPL SI, BX
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm1K
MOVL BX, DI
MOVL BX, 12(SP)
LEAQ (DX)(SI*1), R8
SUBL SI, DI
// emitLiteral
LEAL -1(DI), SI
CMPL SI, $0x1d
JB one_byte_match_emit_repeat_encodeBetterBlockAsm1K
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm1K
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm1K
three_bytes_match_emit_repeat_encodeBetterBlockAsm1K:
MOVB $0xf0, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, SI
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm1K
two_bytes_match_emit_repeat_encodeBetterBlockAsm1K:
MOVB $0xe8, (CX)
MOVB SI, 1(CX)
ADDL $0x1d, SI
ADDQ $0x02, CX
CMPL SI, $0x40
JB memmove_midmatch_emit_repeat_encodeBetterBlockAsm1K
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm1K
one_byte_match_emit_repeat_encodeBetterBlockAsm1K:
SHLB $0x03, SI
MOVB SI, (CX)
ADDQ $0x01, CX
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 1
CMPQ DI, $0x08
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_8through16
CMPQ DI, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm1K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_8through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (CX)
MOVQ R8, -8(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm1K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm1K
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm1K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm1K
memmove_midmatch_emit_repeat_encodeBetterBlockAsm1K:
LEAQ (CX)(DI*1), SI
// genMemMoveShort
// margin: 8, min move: 30
CMPQ DI, $0x20
JBE emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(DI*1)
JMP memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm1K
emit_lit_memmove_mid_match_emit_repeat_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
memmove_mid_end_copy_match_emit_repeat_encodeBetterBlockAsm1K:
MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm1K
memmove_long_match_emit_repeat_encodeBetterBlockAsm1K:
LEAQ (CX)(DI*1), SI
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVQ DI, R10
SHRQ $0x05, R10
MOVQ CX, R9
ANDL $0x0000001f, R9
MOVQ $0x00000040, R12
SUBQ R9, R12
DECQ R10
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
LEAQ -32(R8)(R12*1), R9
LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm1Klarge_big_loop_back:
MOVOU (R9), X4
MOVOU 16(R9), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R9
ADDQ $0x20, R12
DECQ R10
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm1Klarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm1Klarge_forward_sse_loop_32:
MOVOU -32(R8)(R12*1), X4
MOVOU -16(R8)(R12*1), X5
MOVOA X4, -32(CX)(R12*1)
MOVOA X5, -16(CX)(R12*1)
ADDQ $0x20, R12
CMPQ DI, R12
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(DI*1)
MOVOU X3, -16(CX)(DI*1)
MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm1K:
ADDL R11, AX
ADDL $0x04, R11
MOVL AX, 12(SP)
// emitRepeat
LEAL -1(R11), SI
CMPL R11, $0x1d
JBE repeat_one_match_nolit_repeat_encodeBetterBlockAsm1K
LEAL -30(R11), SI
CMPL R11, $0x0000011e
JB repeat_two_match_nolit_repeat_encodeBetterBlockAsm1K
CMPL R11, $0x0001001e
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm1K
MOVB $0xfc, (CX)
MOVL SI, 1(CX)
ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_three_match_nolit_repeat_encodeBetterBlockAsm1K:
MOVB $0xf4, (CX)
MOVW SI, 1(CX)
ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_two_match_nolit_repeat_encodeBetterBlockAsm1K:
MOVB $0xec, (CX)
MOVB SI, 1(CX)
ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm1K
repeat_one_match_nolit_repeat_encodeBetterBlockAsm1K:
XORL SI, SI
LEAL -4(SI)(R11*8), SI
MOVB SI, (CX)
ADDQ $0x01, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm1K:
CMPL AX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm1K
CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm1K
MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm1K:
MOVQ tmp+48(FP), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ $0x9e3779b1, R8
LEAQ 1(BX), BX
LEAQ -2(AX), R9
MOVQ (DX)(BX*1), R10
MOVQ 1(DX)(BX*1), R11
MOVQ (DX)(R9*1), R12
MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ DI, R10
SHRQ $0x35, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
SHLQ $0x10, R12
IMULQ DI, R12
SHRQ $0x35, R12
SHLQ $0x20, R13
IMULQ R8, R13
SHRQ $0x38, R13
LEAQ 1(BX), R8
LEAQ 1(R9), R14
MOVW BX, (SI)(R10*2)
MOVW R9, (SI)(R12*2)
LEAQ 1(R9)(BX*1), R10
SHRQ $0x01, R10
ADDQ $0x01, BX
SUBQ $0x01, R9
MOVW R8, 4096(SI)(R11*2)
MOVW R14, 4096(SI)(R13*2)
index_loop_encodeBetterBlockAsm1K:
CMPQ R10, R9
JAE search_loop_encodeBetterBlockAsm1K
MOVQ (DX)(BX*1), R8
MOVQ (DX)(R10*1), R11
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x35, R8
SHLQ $0x10, R11
IMULQ DI, R11
SHRQ $0x35, R11
MOVW BX, (SI)(R8*2)
MOVW R9, (SI)(R11*2)
ADDQ $0x02, BX
ADDQ $0x02, R10
JMP index_loop_encodeBetterBlockAsm1K
emit_remainder_encodeBetterBlockAsm1K:
MOVQ src_len+32(FP), AX
SUBL 12(SP), AX
LEAQ 3(CX)(AX*1), AX
CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm1K
MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm1K:
MOVQ src_len+32(FP), AX
// emitLiteralsDstP
MOVL 12(SP), BX
CMPL BX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm1K
MOVL AX, SI
MOVL AX, 12(SP)
LEAQ (DX)(BX*1), AX
SUBL BX, SI
// emitLiteral
LEAL -1(SI), DX
CMPL DX, $0x1d
JB one_byte_emit_remainder_encodeBetterBlockAsm1K
SUBL $0x1d, DX
CMPL DX, $0x00000100
JB two_bytes_emit_remainder_encodeBetterBlockAsm1K
JB three_bytes_emit_remainder_encodeBetterBlockAsm1K
three_bytes_emit_remainder_encodeBetterBlockAsm1K:
MOVB $0xf0, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
ADDL $0x1d, DX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm1K
two_bytes_emit_remainder_encodeBetterBlockAsm1K:
MOVB $0xe8, (CX)
MOVB DL, 1(CX)
ADDL $0x1d, DX
ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_midemit_remainder_encodeBetterBlockAsm1K
JMP memmove_long_emit_remainder_encodeBetterBlockAsm1K
one_byte_emit_remainder_encodeBetterBlockAsm1K:
SHLB $0x03, DL
MOVB DL, (CX)
ADDQ $0x01, CX
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -1, min move: 1
CMPQ BX, $0x03
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_1or2
JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_3
CMPQ BX, $0x08
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_4through8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_1or2:
MOVB (AX), SI
MOVB -1(AX)(BX*1), AL
MOVB SI, (CX)
MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_3:
MOVW (AX), SI
MOVB 2(AX), AL
MOVW SI, (CX)
MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_4through8:
MOVL (AX), SI
MOVL -4(AX)(BX*1), AX
MOVL SI, (CX)
MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_8through16:
MOVQ (AX), SI
MOVQ -8(AX)(BX*1), AX
MOVQ SI, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm1K
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm1K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm1K
memmove_midemit_remainder_encodeBetterBlockAsm1K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
// margin: -2, min move: 30
CMPQ BX, $0x20
JBE emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm1K_memmove_move_17through32
JMP emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm1K_memmove_move_33through64
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm1K_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm1K
emit_lit_memmove_mid_emit_remainder_encodeBetterBlockAsm1K_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
memmove_mid_end_copy_emit_remainder_encodeBetterBlockAsm1K:
MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm1K
memmove_long_emit_remainder_encodeBetterBlockAsm1K:
LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
LEAQ -32(AX)(R8*1), SI
LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm1Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm1Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm1Klarge_forward_sse_loop_32:
MOVOU -32(AX)(R8*1), X4
MOVOU -16(AX)(R8*1), X5
MOVOA X4, -32(CX)(R8*1)
MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm1Klarge_forward_sse_loop_32
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm1K:
MOVQ dst_base+0(FP), AX
SUBQ AX, CX
MOVQ CX, ret+56(FP)
RET
// func emitLiteral(dst []byte, lit []byte) int
// Requires: SSE2
TEXT ·emitLiteral(SB), NOSPLIT, $0-56
MOVQ lit_len+32(FP), DX
MOVQ dst_base+0(FP), AX
MOVQ lit_base+24(FP), CX
TESTQ DX, DX
JZ emit_literal_end_standalone_skip
// emitLiteral
MOVL DX, BX
LEAL -1(DX), SI
CMPL SI, $0x1d
JB one_byte_standalone
SUBL $0x1d, SI
CMPL SI, $0x00000100
JB two_bytes_standalone
CMPL SI, $0x00010000
JB three_bytes_standalone
MOVL SI, DI
SHRL $0x10, DI
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB DI, 3(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
ADDL $0x1d, SI
JMP memmove_long_standalone
three_bytes_standalone:
MOVB $0xf0, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
ADDL $0x1d, SI
JMP memmove_long_standalone
two_bytes_standalone:
MOVB $0xe8, (AX)
MOVB SI, 1(AX)
ADDL $0x1d, SI
ADDQ $0x02, BX
ADDQ $0x02, AX
CMPL SI, $0x40
JB memmove_midstandalone
JMP memmove_long_standalone
one_byte_standalone:
SHLB $0x03, SI
MOVB SI, (AX)
ADDQ $0x01, BX
ADDQ $0x01, AX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ DX, $0x03
JB emit_lit_memmove_standalone_memmove_move_1or2
JE emit_lit_memmove_standalone_memmove_move_3
CMPQ DX, $0x08
JBE emit_lit_memmove_standalone_memmove_move_4through8
CMPQ DX, $0x10
JBE emit_lit_memmove_standalone_memmove_move_8through16
CMPQ DX, $0x20
JBE emit_lit_memmove_standalone_memmove_move_17through32
JMP emit_lit_memmove_standalone_memmove_move_33through64
emit_lit_memmove_standalone_memmove_move_1or2:
MOVB (CX), SI
MOVB -1(CX)(DX*1), CL
MOVB SI, (AX)
MOVB CL, -1(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_3:
MOVW (CX), SI
MOVB 2(CX), CL
MOVW SI, (AX)
MOVB CL, 2(AX)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_4through8:
MOVL (CX), SI
MOVL -4(CX)(DX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(DX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(DX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(DX*1), X2
MOVOU -16(CX)(DX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
JMP emit_literal_end_standalone
JMP emit_literal_end_standalone
memmove_midstandalone:
// genMemMoveShort
// margin: 0, min move: 30
CMPQ DX, $0x20
JBE emit_lit_memmove_mid_standalone_memmove_move_17through32
JMP emit_lit_memmove_mid_standalone_memmove_move_33through64
emit_lit_memmove_mid_standalone_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(DX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_mid_standalone_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(DX*1), X2
MOVOU -16(CX)(DX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
JMP emit_literal_end_standalone
JMP emit_literal_end_standalone
memmove_long_standalone:
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(DX*1), X2
MOVOU -16(CX)(DX*1), X3
MOVQ DX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_standalonelarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_standalonelarge_big_loop_back
emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ DX, R8
JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
JMP emit_literal_end_standalone
JMP emit_literal_end_standalone
emit_literal_end_standalone_skip:
XORQ BX, BX
emit_literal_end_standalone:
MOVQ BX, ret+48(FP)
RET
// func emitRepeat(dst []byte, length int) int
TEXT ·emitRepeat(SB), NOSPLIT, $0-40
XORQ DX, DX
MOVQ dst_base+0(FP), AX
MOVQ length+24(FP), CX
// emitRepeat
LEAL -1(CX), BX
CMPL CX, $0x1d
JBE repeat_one_standalone
LEAL -30(CX), BX
CMPL CX, $0x0000011e
JB repeat_two_standalone
CMPL CX, $0x0001001e
JB repeat_three_standalone
MOVB $0xfc, (AX)
MOVL BX, 1(AX)
ADDQ $0x04, DX
ADDQ $0x04, AX
JMP gen_emit_repeat_end
repeat_three_standalone:
MOVB $0xf4, (AX)
MOVW BX, 1(AX)
ADDQ $0x03, DX
ADDQ $0x03, AX
JMP gen_emit_repeat_end
repeat_two_standalone:
MOVB $0xec, (AX)
MOVB BL, 1(AX)
ADDQ $0x02, DX
ADDQ $0x02, AX
JMP gen_emit_repeat_end
repeat_one_standalone:
XORL BX, BX
LEAL -4(BX)(CX*8), BX
MOVB BL, (AX)
ADDQ $0x01, DX
ADDQ $0x01, AX
gen_emit_repeat_end:
MOVQ DX, ret+32(FP)
RET
// func emitCopy(dst []byte, offset int, length int) int
TEXT ·emitCopy(SB), NOSPLIT, $0-48
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ offset+24(FP), CX
MOVQ length+32(FP), DX
// emitCopy
CMPL CX, $0x0001003f
JBE two_byte_offset_standalone
// emitCopy3
LEAL -4(DX), DX
LEAL -65536(CX), CX
SHLL $0x0b, CX
ADDL $0x07, CX
CMPL DX, $0x3c
JBE emit_copy3_0_standalone_emit3
LEAL -60(DX), SI
CMPL DX, $0x0000013c
JB emit_copy3_1_standalone_emit3
CMPL DX, $0x0001003c
JB emit_copy3_2_standalone_emit3
ADDL $0x000007e0, CX
MOVL CX, (AX)
MOVL SI, 4(AX)
ADDQ $0x07, BX
ADDQ $0x07, AX
JMP gen_emit_copy_end
emit_copy3_2_standalone_emit3:
ADDL $0x000007c0, CX
MOVL CX, (AX)
MOVW SI, 4(AX)
ADDQ $0x06, BX
ADDQ $0x06, AX
JMP gen_emit_copy_end
emit_copy3_1_standalone_emit3:
ADDL $0x000007a0, CX
MOVL CX, (AX)
MOVB SI, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
emit_copy3_0_standalone_emit3:
SHLL $0x05, DX
ORL DX, CX
MOVL CX, (AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_copy_end
two_byte_offset_standalone:
CMPL CX, $0x00000400
JA two_byte_standalone
CMPL DX, $0x00000013
JAE emit_one_longer_standalone
LEAL -1(CX), CX
SHLL $0x06, CX
LEAL -15(CX)(DX*4), CX
MOVW CX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
emit_one_longer_standalone:
CMPL DX, $0x00000112
JAE emit_copy1_repeat_standalone
LEAL -1(CX), CX
SHLL $0x06, CX
LEAL 61(CX), CX
MOVW CX, (AX)
LEAL -18(DX), CX
MOVB CL, 2(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_copy_end
emit_copy1_repeat_standalone:
LEAL -1(CX), CX
SHLL $0x06, CX
LEAL 57(CX), CX
MOVW CX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
SUBL $0x12, DX
// emitRepeat
LEAL -1(DX), CX
CMPL DX, $0x1d
JBE repeat_one_emit_copy1_do_repeat_standalone
LEAL -30(DX), CX
CMPL DX, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_standalone
CMPL DX, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_standalone
MOVB $0xfc, (AX)
MOVL CX, 1(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_copy_end
repeat_three_emit_copy1_do_repeat_standalone:
MOVB $0xf4, (AX)
MOVW CX, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_copy_end
repeat_two_emit_copy1_do_repeat_standalone:
MOVB $0xec, (AX)
MOVB CL, 1(AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
repeat_one_emit_copy1_do_repeat_standalone:
XORL CX, CX
LEAL -4(CX)(DX*8), CX
MOVB CL, (AX)
ADDQ $0x01, BX
ADDQ $0x01, AX
JMP gen_emit_copy_end
two_byte_standalone:
// emitCopy2
LEAL -64(CX), CX
LEAL -4(DX), DX
MOVW CX, 1(AX)
CMPL DX, $0x3c
JBE emit_copy2_0_standalone_emit2
LEAL -60(DX), CX
CMPL DX, $0x0000013c
JB emit_copy2_1_standalone_emit2
CMPL DX, $0x0001003c
JB emit_copy2_2_standalone_emit2
MOVB $0xfe, (AX)
MOVL CX, 3(AX)
ADDQ $0x06, BX
ADDQ $0x06, AX
JMP gen_emit_copy_end
emit_copy2_2_standalone_emit2:
MOVB $0xfa, (AX)
MOVW CX, 3(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
emit_copy2_1_standalone_emit2:
MOVB $0xf6, (AX)
MOVB CL, 3(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_copy_end
emit_copy2_0_standalone_emit2:
MOVL $0x00000002, CX
LEAL (CX)(DX*4), CX
MOVB CL, (AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
gen_emit_copy_end:
MOVQ BX, ret+40(FP)
RET
// func emitCopyLits2(dst []byte, lits []byte, offset int, length int) int
// Requires: CMOV
TEXT ·emitCopyLits2(SB), NOSPLIT, $0-72
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ lits_len+32(FP), SI
MOVQ offset+48(FP), CX
MOVQ length+56(FP), DX
CMPL DX, $0x0b
// emitCopy2WithLits
XORQ DI, DI
SUBL $0x40, CX
LEAL -11(DX), R8
LEAL -4(DX), DX
MOVW CX, 1(AX)
CMPL DX, $0x07
CMOVLGE R8, DI
MOVQ $0x00000007, CX
CMOVLLT DX, CX
LEAL -1(SI)(CX*4), CX
MOVL $0x00000003, DX
LEAL (DX)(CX*8), CX
MOVB CL, (AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
MOVQ lits_base+24(FP), CX
// genMemMoveVeryShort
CMPQ SI, $0x03
JE standalone_emitcopy2_lits_move_3
JA standalone_emitcopy2_lits_move_4
MOVB (CX), DL
MOVB -1(CX)(SI*1), CL
MOVB DL, (AX)
MOVB CL, -1(AX)(SI*1)
JMP standalone_emitcopy2_lits_end
standalone_emitcopy2_lits_move_3:
MOVW (CX), DX
MOVB 2(CX), CL
MOVW DX, (AX)
MOVB CL, 2(AX)
JMP standalone_emitcopy2_lits_end
standalone_emitcopy2_lits_move_4:
MOVL (CX), DX
MOVL DX, (AX)
standalone_emitcopy2_lits_end:
ADDQ SI, BX
ADDQ SI, AX
TESTL DI, DI
JZ standalone_emitcopy2_lits_done
// emitRepeat
LEAL -1(DI), CX
CMPL DI, $0x1d
JBE repeat_one_standalone_emitcopy2_lits
LEAL -30(DI), CX
CMPL DI, $0x0000011e
JB repeat_two_standalone_emitcopy2_lits
CMPL DI, $0x0001001e
JB repeat_three_standalone_emitcopy2_lits
MOVB $0xfc, (AX)
MOVL CX, 1(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP standalone_emitcopy2_lits_done
repeat_three_standalone_emitcopy2_lits:
MOVB $0xf4, (AX)
MOVW CX, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP standalone_emitcopy2_lits_done
repeat_two_standalone_emitcopy2_lits:
MOVB $0xec, (AX)
MOVB CL, 1(AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP standalone_emitcopy2_lits_done
repeat_one_standalone_emitcopy2_lits:
XORL CX, CX
LEAL -4(CX)(DI*8), CX
MOVB CL, (AX)
ADDQ $0x01, BX
ADDQ $0x01, AX
standalone_emitcopy2_lits_done:
MOVQ BX, ret+64(FP)
RET
// func emitCopyLits3(dst []byte, lits []byte, offset int, length int) int
TEXT ·emitCopyLits3(SB), NOSPLIT, $0-72
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ lits_len+32(FP), SI
MOVQ offset+48(FP), CX
MOVQ length+56(FP), DX
// emitCopy3
LEAL -4(DX), DX
LEAL -65536(CX), CX
SHLL $0x0b, CX
LEAL 7(CX)(SI*8), CX
CMPL DX, $0x3c
JBE emit_copy3_0_standalone_lits
LEAL -60(DX), DI
CMPL DX, $0x0000013c
JB emit_copy3_1_standalone_lits
CMPL DX, $0x0001003c
JB emit_copy3_2_standalone_lits
ADDL $0x000007e0, CX
MOVL CX, (AX)
MOVL DI, 4(AX)
ADDQ $0x07, BX
ADDQ $0x07, AX
JMP gen_emit_copy_lits_copylits
emit_copy3_2_standalone_lits:
ADDL $0x000007c0, CX
MOVL CX, (AX)
MOVW DI, 4(AX)
ADDQ $0x06, BX
ADDQ $0x06, AX
JMP gen_emit_copy_lits_copylits
emit_copy3_1_standalone_lits:
ADDL $0x000007a0, CX
MOVL CX, (AX)
MOVB DI, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_lits_copylits
emit_copy3_0_standalone_lits:
SHLL $0x05, DX
ORL DX, CX
MOVL CX, (AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
gen_emit_copy_lits_copylits:
MOVQ lits_base+24(FP), CX
// genMemMoveVeryShort
CMPQ SI, $0x03
JE standalone_emitcopy3_lits_move_3
MOVB (CX), DL
MOVB -1(CX)(SI*1), CL
MOVB DL, (AX)
MOVB CL, -1(AX)(SI*1)
JMP standalone_emitcopy3_lits_end
standalone_emitcopy3_lits_move_3:
MOVW (CX), DX
MOVB 2(CX), CL
MOVW DX, (AX)
MOVB CL, 2(AX)
standalone_emitcopy3_lits_end:
ADDQ SI, BX
MOVQ BX, ret+64(FP)
RET
// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
JMP matchlen_loop_16_entry_standalone
matchlen_loopback_16_standalone:
MOVQ (AX)(SI*1), BX
MOVQ 8(AX)(SI*1), DI
XORQ (CX)(SI*1), BX
JNZ matchlen_bsf_8_standalone
XORQ 8(CX)(SI*1), DI
JNZ matchlen_bsf_16standalone
LEAL -16(DX), DX
LEAL 16(SI), SI
matchlen_loop_16_entry_standalone:
CMPL DX, $0x10
JAE matchlen_loopback_16_standalone
JMP matchlen_match8_standalone
matchlen_bsf_16standalone:
#ifdef GOAMD64_v3
TZCNTQ DI, DI
#else
BSFQ DI, DI
#endif
SARQ $0x03, DI
LEAL 8(SI)(DI*1), SI
JMP gen_match_len_end
matchlen_match8_standalone:
CMPL DX, $0x08
JB matchlen_match4_standalone
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
JNZ matchlen_bsf_8_standalone
LEAL -8(DX), DX
LEAL 8(SI), SI
JMP matchlen_match4_standalone
matchlen_bsf_8_standalone:
#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
MOVL (AX)(SI*1), BX
CMPL (CX)(SI*1), BX
JNE matchlen_match2_standalone
LEAL -4(DX), DX
LEAL 4(SI), SI
matchlen_match2_standalone:
CMPL DX, $0x01
JE matchlen_match1_standalone
JB gen_match_len_end
MOVW (AX)(SI*1), BX
CMPW (CX)(SI*1), BX
JNE matchlen_match1_standalone
LEAL 2(SI), SI
SUBL $0x02, DX
JZ gen_match_len_end
matchlen_match1_standalone:
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
LEAL 1(SI), SI
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET
// func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// Requires: CMOV, SSE2
TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $8-64
XORQ SI, SI
MOVQ dst_base+0(FP), AX
MOVQ dst_len+8(FP), CX
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
LEAQ (DX)(BX*1), BX
LEAQ -12(AX)(CX*1), CX
MOVL $0x00000001, (SP)
lz4_mz_loop:
CMPQ DX, BX
JAE lz4_mz_corrupt
CMPQ AX, CX
JAE lz4_mz_dstfull
MOVBQZX (DX), DI
MOVQ DI, R8
MOVQ DI, R9
ANDQ $0x0f, R9
XORQ R10, R10
SHRQ $0x04, R8
CMPQ DI, $0x50
CMOVQLT R8, R10
JLT lz4_mz_ll_end
CMPQ DI, $0xf0
JB lz4_mz_ll_end
lz4_mz_ll_loop:
INCQ DX
CMPQ DX, BX
JAE lz4_mz_corrupt
MOVBQZX (DX), DI
ADDQ DI, R8
CMPQ DI, $0xff
JEQ lz4_mz_ll_loop
lz4_mz_ll_end:
LEAQ (DX)(R8*1), DI
ADDQ $0x04, R9
CMPQ DI, BX
JAE lz4_mz_corrupt
INCQ DX
INCQ DI
TESTQ R8, R8
JZ lz4_mz_lits_done
TESTQ R10, R10
JNZ lz4_mz_lits_done
LEAQ (AX)(R8*1), R11
CMPQ R11, CX
JAE lz4_mz_dstfull
// emitLiteral
LEAL -1(R8), R11
CMPL R11, $0x1d
JB one_byte_lz4_mz
SUBL $0x1d, R11
CMPL R11, $0x00000100
JB two_bytes_lz4_mz
CMPL R11, $0x00010000
JB three_bytes_lz4_mz
MOVL R11, R12
SHRL $0x10, R12
MOVB $0xf8, (AX)
MOVW R11, 1(AX)
MOVB R12, 3(AX)
ADDQ $0x04, AX
ADDL $0x1d, R11
JMP memmove_long_lz4_mz
three_bytes_lz4_mz:
MOVB $0xf0, (AX)
MOVW R11, 1(AX)
ADDQ $0x03, AX
ADDL $0x1d, R11
JMP memmove_long_lz4_mz
two_bytes_lz4_mz:
MOVB $0xe8, (AX)
MOVB R11, 1(AX)
ADDL $0x1d, R11
ADDQ $0x02, AX
CMPL R11, $0x40
JB memmove_midlz4_mz
JMP memmove_long_lz4_mz
one_byte_lz4_mz:
SHLB $0x03, R11
MOVB R11, (AX)
ADDQ $0x01, AX
LEAQ (AX)(R8*1), R11
MOVL R8, R12
// genMemMoveShort
// margin: 0, min move: 1
CMPQ R12, $0x03
JB emit_lit_memmove_lz4_mz_memmove_move_1or2
JE emit_lit_memmove_lz4_mz_memmove_move_3
CMPQ R12, $0x08
JBE emit_lit_memmove_lz4_mz_memmove_move_4through8
CMPQ R12, $0x10
JBE emit_lit_memmove_lz4_mz_memmove_move_8through16
CMPQ R12, $0x20
JBE emit_lit_memmove_lz4_mz_memmove_move_17through32
JMP emit_lit_memmove_lz4_mz_memmove_move_33through64
emit_lit_memmove_lz4_mz_memmove_move_1or2:
MOVB (DX), R13
MOVB -1(DX)(R12*1), R14
MOVB R13, (AX)
MOVB R14, -1(AX)(R12*1)
JMP memmove_end_copy_lz4_mz
emit_lit_memmove_lz4_mz_memmove_move_3:
MOVW (DX), R13
MOVB 2(DX), R14
MOVW R13, (AX)
MOVB R14, 2(AX)
JMP memmove_end_copy_lz4_mz
emit_lit_memmove_lz4_mz_memmove_move_4through8:
MOVL (DX), R13
MOVL -4(DX)(R12*1), R14
MOVL R13, (AX)
MOVL R14, -4(AX)(R12*1)
JMP memmove_end_copy_lz4_mz
emit_lit_memmove_lz4_mz_memmove_move_8through16:
MOVQ (DX), R13
MOVQ -8(DX)(R12*1), R14
MOVQ R13, (AX)
MOVQ R14, -8(AX)(R12*1)
JMP memmove_end_copy_lz4_mz
emit_lit_memmove_lz4_mz_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(R12*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R12*1)
JMP memmove_end_copy_lz4_mz
emit_lit_memmove_lz4_mz_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(R12*1), X2
MOVOU -16(DX)(R12*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R12*1)
MOVOU X3, -16(AX)(R12*1)
memmove_end_copy_lz4_mz:
MOVQ R11, AX
JMP lz4_mz_lits_emit_done
memmove_midlz4_mz:
LEAQ (AX)(R8*1), R11
MOVL R8, R12
// genMemMoveShort
// margin: 0, min move: 30
CMPQ R12, $0x20
JBE emit_lit_memmove_mid_lz4_mz_memmove_move_17through32
JMP emit_lit_memmove_mid_lz4_mz_memmove_move_33through64
emit_lit_memmove_mid_lz4_mz_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(R12*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R12*1)
JMP memmove_mid_end_copy_lz4_mz
emit_lit_memmove_mid_lz4_mz_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(R12*1), X2
MOVOU -16(DX)(R12*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R12*1)
MOVOU X3, -16(AX)(R12*1)
memmove_mid_end_copy_lz4_mz:
MOVQ R11, AX
JMP lz4_mz_lits_emit_done
memmove_long_lz4_mz:
LEAQ (AX)(R8*1), R11
MOVL R8, R12
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(R12*1), X2
MOVOU -16(DX)(R12*1), X3
MOVQ R12, R14
SHRQ $0x05, R14
MOVQ AX, R13
ANDL $0x0000001f, R13
MOVQ $0x00000040, R15
SUBQ R13, R15
DECQ R14
JA emit_lit_memmove_long_lz4_mzlarge_forward_sse_loop_32
LEAQ -32(DX)(R15*1), R13
LEAQ -32(AX)(R15*1), BP
emit_lit_memmove_long_lz4_mzlarge_big_loop_back:
MOVOU (R13), X4
MOVOU 16(R13), X5
MOVOA X4, (BP)
MOVOA X5, 16(BP)
ADDQ $0x20, BP
ADDQ $0x20, R13
ADDQ $0x20, R15
DECQ R14
JNA emit_lit_memmove_long_lz4_mzlarge_big_loop_back
emit_lit_memmove_long_lz4_mzlarge_forward_sse_loop_32:
MOVOU -32(DX)(R15*1), X4
MOVOU -16(DX)(R15*1), X5
MOVOA X4, -32(AX)(R15*1)
MOVOA X5, -16(AX)(R15*1)
ADDQ $0x20, R15
CMPQ R12, R15
JAE emit_lit_memmove_long_lz4_mzlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R12*1)
MOVOU X3, -16(AX)(R12*1)
MOVQ R11, AX
lz4_mz_lits_emit_done:
lz4_mz_lits_done:
ADDQ R8, SI
MOVQ DI, R8
MOVQ DX, DI
MOVQ R8, DX
CMPQ DX, BX
JNE lz4_mz_match
CMPQ R9, $0x04
JNE lz4_mz_corrupt
TESTQ R10, R10
JNZ lz4_mz_emit_final
JMP lz4_mz_done
lz4_mz_match:
ADDQ $0x02, DX
CMPQ DX, BX
JAE lz4_mz_corrupt
MOVWQZX -2(DX), R8
TESTQ R8, R8
JZ lz4_mz_corrupt
CMPQ R8, SI
JA lz4_mz_corrupt
CMPQ R9, $0x13
JNE lz4_mz_ml_done
lz4_mz_ml_loop:
MOVBQZX (DX), R11
INCQ DX
ADDQ R11, R9
CMPQ DX, BX
JAE lz4_mz_corrupt
CMPQ R11, $0xff
JEQ lz4_mz_ml_loop
lz4_mz_ml_done:
ADDQ R9, SI
TESTQ R10, R10
JNZ lz4_mz_dofuse
CMPQ (SP), R8
JNE lz4_mz_docopy
// emitRepeat
LEAL -1(R9), DI
CMPL R9, $0x1d
JBE repeat_one_lz4_mz
LEAL -30(R9), DI
CMPL R9, $0x0000011e
JB repeat_two_lz4_mz
CMPL R9, $0x0001001e
JB repeat_three_lz4_mz
MOVB $0xfc, (AX)
MOVL DI, 1(AX)
ADDQ $0x04, AX
JMP lz4_mz_loop
repeat_three_lz4_mz:
MOVB $0xf4, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
repeat_two_lz4_mz:
MOVB $0xec, (AX)
MOVB DI, 1(AX)
ADDQ $0x02, AX
JMP lz4_mz_loop
repeat_one_lz4_mz:
XORL DI, DI
LEAL -4(DI)(R9*8), DI
MOVB DI, (AX)
ADDQ $0x01, AX
JMP lz4_mz_loop
lz4_mz_dofuse:
MOVQ R8, (SP)
CMPQ R8, $0x40
JB lz4_mz_doemitcopy
// emitCopy2WithLits
XORQ R11, R11
SUBL $0x40, R8
LEAL -11(R9), R12
LEAL -4(R9), R9
MOVW R8, 1(AX)
CMPL R9, $0x07
CMOVLGE R12, R11
MOVQ $0x00000007, R8
CMOVLLT R9, R8
LEAL -1(R10)(R8*4), R8
MOVL $0x00000003, R9
LEAL (R9)(R8*8), R8
MOVB R8, (AX)
ADDQ $0x03, AX
MOVL (DI), DI
MOVL DI, (AX)
ADDQ R10, AX
TESTL R11, R11
JZ lz4_mz_loop
// emitRepeat
LEAL -1(R11), DI
CMPL R11, $0x1d
JBE repeat_one_fused_emitrep_lz4_mz_
LEAL -30(R11), DI
CMPL R11, $0x0000011e
JB repeat_two_fused_emitrep_lz4_mz_
CMPL R11, $0x0001001e
JB repeat_three_fused_emitrep_lz4_mz_
MOVB $0xfc, (AX)
MOVL DI, 1(AX)
ADDQ $0x04, AX
JMP lz4_mz_loop
repeat_three_fused_emitrep_lz4_mz_:
MOVB $0xf4, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
repeat_two_fused_emitrep_lz4_mz_:
MOVB $0xec, (AX)
MOVB DI, 1(AX)
ADDQ $0x02, AX
JMP lz4_mz_loop
repeat_one_fused_emitrep_lz4_mz_:
XORL DI, DI
LEAL -4(DI)(R11*8), DI
MOVB DI, (AX)
ADDQ $0x01, AX
JMP lz4_mz_loop
lz4_mz_doemitcopy:
// emitLiteral
LEAL -1(R10), R11
CMPL R11, $0x1d
JB one_byte_lz4_mz_emitcopy
SUBL $0x1d, R11
CMPL R11, $0x00000100
JB two_bytes_lz4_mz_emitcopy
CMPL R11, $0x00010000
JB three_bytes_lz4_mz_emitcopy
MOVL R11, R12
SHRL $0x10, R12
MOVB $0xf8, (AX)
MOVW R11, 1(AX)
MOVB R12, 3(AX)
ADDQ $0x04, AX
ADDL $0x1d, R11
JMP memmove_long_lz4_mz_emitcopy
three_bytes_lz4_mz_emitcopy:
MOVB $0xf0, (AX)
MOVW R11, 1(AX)
ADDQ $0x03, AX
ADDL $0x1d, R11
JMP memmove_long_lz4_mz_emitcopy
two_bytes_lz4_mz_emitcopy:
MOVB $0xe8, (AX)
MOVB R11, 1(AX)
ADDL $0x1d, R11
ADDQ $0x02, AX
CMPL R11, $0x40
JB memmove_midlz4_mz_emitcopy
JMP memmove_long_lz4_mz_emitcopy
one_byte_lz4_mz_emitcopy:
SHLB $0x03, R11
MOVB R11, (AX)
ADDQ $0x01, AX
LEAQ (AX)(R10*1), R11
// genMemMoveShort
// margin: 0, min move: 1
CMPQ R10, $0x03
JB emit_lit_memmove_lz4_mz_emitcopy_memmove_move_1or2
JE emit_lit_memmove_lz4_mz_emitcopy_memmove_move_3
CMPQ R10, $0x08
JBE emit_lit_memmove_lz4_mz_emitcopy_memmove_move_4through8
CMPQ R10, $0x10
JBE emit_lit_memmove_lz4_mz_emitcopy_memmove_move_8through16
CMPQ R10, $0x20
JBE emit_lit_memmove_lz4_mz_emitcopy_memmove_move_17through32
JMP emit_lit_memmove_lz4_mz_emitcopy_memmove_move_33through64
emit_lit_memmove_lz4_mz_emitcopy_memmove_move_1or2:
MOVB (DI), R12
MOVB -1(DI)(R10*1), DI
MOVB R12, (AX)
MOVB DI, -1(AX)(R10*1)
JMP memmove_end_copy_lz4_mz_emitcopy
emit_lit_memmove_lz4_mz_emitcopy_memmove_move_3:
MOVW (DI), R12
MOVB 2(DI), DI
MOVW R12, (AX)
MOVB DI, 2(AX)
JMP memmove_end_copy_lz4_mz_emitcopy
emit_lit_memmove_lz4_mz_emitcopy_memmove_move_4through8:
MOVL (DI), R12
MOVL -4(DI)(R10*1), DI
MOVL R12, (AX)
MOVL DI, -4(AX)(R10*1)
JMP memmove_end_copy_lz4_mz_emitcopy
emit_lit_memmove_lz4_mz_emitcopy_memmove_move_8through16:
MOVQ (DI), R12
MOVQ -8(DI)(R10*1), DI
MOVQ R12, (AX)
MOVQ DI, -8(AX)(R10*1)
JMP memmove_end_copy_lz4_mz_emitcopy
emit_lit_memmove_lz4_mz_emitcopy_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R10*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R10*1)
JMP memmove_end_copy_lz4_mz_emitcopy
emit_lit_memmove_lz4_mz_emitcopy_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R10*1), X2
MOVOU -16(DI)(R10*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R10*1)
MOVOU X3, -16(AX)(R10*1)
memmove_end_copy_lz4_mz_emitcopy:
MOVQ R11, AX
JMP lz4_mz__emit_done
memmove_midlz4_mz_emitcopy:
LEAQ (AX)(R10*1), R11
// genMemMoveShort
// margin: 0, min move: 30
CMPQ R10, $0x20
JBE emit_lit_memmove_mid_lz4_mz_emitcopy_memmove_move_17through32
JMP emit_lit_memmove_mid_lz4_mz_emitcopy_memmove_move_33through64
emit_lit_memmove_mid_lz4_mz_emitcopy_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R10*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R10*1)
JMP memmove_mid_end_copy_lz4_mz_emitcopy
emit_lit_memmove_mid_lz4_mz_emitcopy_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R10*1), X2
MOVOU -16(DI)(R10*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R10*1)
MOVOU X3, -16(AX)(R10*1)
memmove_mid_end_copy_lz4_mz_emitcopy:
MOVQ R11, AX
JMP lz4_mz__emit_done
memmove_long_lz4_mz_emitcopy:
LEAQ (AX)(R10*1), R11
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R10*1), X2
MOVOU -16(DI)(R10*1), X3
MOVQ R10, R13
SHRQ $0x05, R13
MOVQ AX, R12
ANDL $0x0000001f, R12
MOVQ $0x00000040, R14
SUBQ R12, R14
DECQ R13
JA emit_lit_memmove_long_lz4_mz_emitcopylarge_forward_sse_loop_32
LEAQ -32(DI)(R14*1), R12
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_lz4_mz_emitcopylarge_big_loop_back:
MOVOU (R12), X4
MOVOU 16(R12), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R12
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_lz4_mz_emitcopylarge_big_loop_back
emit_lit_memmove_long_lz4_mz_emitcopylarge_forward_sse_loop_32:
MOVOU -32(DI)(R14*1), X4
MOVOU -16(DI)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R10, R14
JAE emit_lit_memmove_long_lz4_mz_emitcopylarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R10*1)
MOVOU X3, -16(AX)(R10*1)
MOVQ R11, AX
lz4_mz__emit_done:
// emitCopy
CMPL R8, $0x00000400
JA two_byte_lz4_mz__lz4_mz_short_
CMPL R9, $0x00000013
JAE emit_one_longer_lz4_mz__lz4_mz_short_
LEAL -1(R8), DI
SHLL $0x06, DI
LEAL -15(DI)(R9*4), DI
MOVW DI, (AX)
ADDQ $0x02, AX
JMP lz4_mz_loop
emit_one_longer_lz4_mz__lz4_mz_short_:
CMPL R9, $0x00000112
JAE emit_copy1_repeat_lz4_mz__lz4_mz_short_
LEAL -1(R8), DI
SHLL $0x06, DI
LEAL 61(DI), DI
MOVW DI, (AX)
LEAL -18(R9), DI
MOVB DI, 2(AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
emit_copy1_repeat_lz4_mz__lz4_mz_short_:
LEAL -1(R8), DI
SHLL $0x06, DI
LEAL 57(DI), DI
MOVW DI, (AX)
ADDQ $0x02, AX
SUBL $0x12, R9
// emitRepeat
LEAL -1(R9), DI
CMPL R9, $0x1d
JBE repeat_one_emit_copy1_do_repeat_lz4_mz__lz4_mz_short_
LEAL -30(R9), DI
CMPL R9, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_lz4_mz__lz4_mz_short_
CMPL R9, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_lz4_mz__lz4_mz_short_
MOVB $0xfc, (AX)
MOVL DI, 1(AX)
ADDQ $0x04, AX
JMP lz4_mz_loop
repeat_three_emit_copy1_do_repeat_lz4_mz__lz4_mz_short_:
MOVB $0xf4, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
repeat_two_emit_copy1_do_repeat_lz4_mz__lz4_mz_short_:
MOVB $0xec, (AX)
MOVB DI, 1(AX)
ADDQ $0x02, AX
JMP lz4_mz_loop
repeat_one_emit_copy1_do_repeat_lz4_mz__lz4_mz_short_:
XORL DI, DI
LEAL -4(DI)(R9*8), DI
MOVB DI, (AX)
ADDQ $0x01, AX
JMP lz4_mz_loop
two_byte_lz4_mz__lz4_mz_short_:
// emitCopy2
LEAL -64(R8), R8
LEAL -4(R9), R9
MOVW R8, 1(AX)
CMPL R9, $0x3c
JBE emit_copy2_0_lz4_mz__lz4_mz_short__emit2
LEAL -60(R9), DI
CMPL R9, $0x0000013c
JB emit_copy2_1_lz4_mz__lz4_mz_short__emit2
CMPL R9, $0x0001003c
JB emit_copy2_2_lz4_mz__lz4_mz_short__emit2
MOVB $0xfe, (AX)
MOVL DI, 3(AX)
ADDQ $0x06, AX
JMP lz4_mz_loop
emit_copy2_2_lz4_mz__lz4_mz_short__emit2:
MOVB $0xfa, (AX)
MOVW DI, 3(AX)
ADDQ $0x05, AX
JMP lz4_mz_loop
emit_copy2_1_lz4_mz__lz4_mz_short__emit2:
MOVB $0xf6, (AX)
MOVB DI, 3(AX)
ADDQ $0x04, AX
JMP lz4_mz_loop
emit_copy2_0_lz4_mz__lz4_mz_short__emit2:
MOVL $0x00000002, DI
LEAL (DI)(R9*4), DI
MOVB DI, (AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
lz4_mz_docopy:
MOVQ R8, (SP)
// emitCopy
CMPL R8, $0x00000400
JA two_byte_lz4_mz__lz4_mz
CMPL R9, $0x00000013
JAE emit_one_longer_lz4_mz__lz4_mz
LEAL -1(R8), DI
SHLL $0x06, DI
LEAL -15(DI)(R9*4), DI
MOVW DI, (AX)
ADDQ $0x02, AX
JMP lz4_mz_loop
emit_one_longer_lz4_mz__lz4_mz:
CMPL R9, $0x00000112
JAE emit_copy1_repeat_lz4_mz__lz4_mz
LEAL -1(R8), DI
SHLL $0x06, DI
LEAL 61(DI), DI
MOVW DI, (AX)
LEAL -18(R9), DI
MOVB DI, 2(AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
emit_copy1_repeat_lz4_mz__lz4_mz:
LEAL -1(R8), DI
SHLL $0x06, DI
LEAL 57(DI), DI
MOVW DI, (AX)
ADDQ $0x02, AX
SUBL $0x12, R9
// emitRepeat
LEAL -1(R9), DI
CMPL R9, $0x1d
JBE repeat_one_emit_copy1_do_repeat_lz4_mz__lz4_mz
LEAL -30(R9), DI
CMPL R9, $0x0000011e
JB repeat_two_emit_copy1_do_repeat_lz4_mz__lz4_mz
CMPL R9, $0x0001001e
JB repeat_three_emit_copy1_do_repeat_lz4_mz__lz4_mz
MOVB $0xfc, (AX)
MOVL DI, 1(AX)
ADDQ $0x04, AX
JMP lz4_mz_loop
repeat_three_emit_copy1_do_repeat_lz4_mz__lz4_mz:
MOVB $0xf4, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
repeat_two_emit_copy1_do_repeat_lz4_mz__lz4_mz:
MOVB $0xec, (AX)
MOVB DI, 1(AX)
ADDQ $0x02, AX
JMP lz4_mz_loop
repeat_one_emit_copy1_do_repeat_lz4_mz__lz4_mz:
XORL DI, DI
LEAL -4(DI)(R9*8), DI
MOVB DI, (AX)
ADDQ $0x01, AX
JMP lz4_mz_loop
two_byte_lz4_mz__lz4_mz:
// emitCopy2
LEAL -64(R8), R8
LEAL -4(R9), R9
MOVW R8, 1(AX)
CMPL R9, $0x3c
JBE emit_copy2_0_lz4_mz__lz4_mz_emit2
LEAL -60(R9), DI
CMPL R9, $0x0000013c
JB emit_copy2_1_lz4_mz__lz4_mz_emit2
CMPL R9, $0x0001003c
JB emit_copy2_2_lz4_mz__lz4_mz_emit2
MOVB $0xfe, (AX)
MOVL DI, 3(AX)
ADDQ $0x06, AX
JMP lz4_mz_loop
emit_copy2_2_lz4_mz__lz4_mz_emit2:
MOVB $0xfa, (AX)
MOVW DI, 3(AX)
ADDQ $0x05, AX
JMP lz4_mz_loop
emit_copy2_1_lz4_mz__lz4_mz_emit2:
MOVB $0xf6, (AX)
MOVB DI, 3(AX)
ADDQ $0x04, AX
JMP lz4_mz_loop
emit_copy2_0_lz4_mz__lz4_mz_emit2:
MOVL $0x00000002, DI
LEAL (DI)(R9*4), DI
MOVB DI, (AX)
ADDQ $0x03, AX
JMP lz4_mz_loop
lz4_mz_emit_final:
// emitLiteral
LEAL -1(R10), CX
CMPL CX, $0x1d
JB one_byte_lz4_mz_emit_final
SUBL $0x1d, CX
CMPL CX, $0x00000100
JB two_bytes_lz4_mz_emit_final
CMPL CX, $0x00010000
JB three_bytes_lz4_mz_emit_final
MOVL CX, DX
SHRL $0x10, DX
MOVB $0xf8, (AX)
MOVW CX, 1(AX)
MOVB DL, 3(AX)
ADDQ $0x04, AX
ADDL $0x1d, CX
JMP memmove_long_lz4_mz_emit_final
three_bytes_lz4_mz_emit_final:
MOVB $0xf0, (AX)
MOVW CX, 1(AX)
ADDQ $0x03, AX
ADDL $0x1d, CX
JMP memmove_long_lz4_mz_emit_final
two_bytes_lz4_mz_emit_final:
MOVB $0xe8, (AX)
MOVB CL, 1(AX)
ADDL $0x1d, CX
ADDQ $0x02, AX
CMPL CX, $0x40
JB memmove_midlz4_mz_emit_final
JMP memmove_long_lz4_mz_emit_final
one_byte_lz4_mz_emit_final:
SHLB $0x03, CL
MOVB CL, (AX)
ADDQ $0x01, AX
LEAQ (AX)(R10*1), CX
MOVL R10, DX
// genMemMoveShort
// margin: 0, min move: 1
CMPQ DX, $0x03
JB emit_lit_memmove_lz4_mz_emit_final_memmove_move_1or2
JE emit_lit_memmove_lz4_mz_emit_final_memmove_move_3
CMPQ DX, $0x08
JBE emit_lit_memmove_lz4_mz_emit_final_memmove_move_4through8
CMPQ DX, $0x10
JBE emit_lit_memmove_lz4_mz_emit_final_memmove_move_8through16
CMPQ DX, $0x20
JBE emit_lit_memmove_lz4_mz_emit_final_memmove_move_17through32
JMP emit_lit_memmove_lz4_mz_emit_final_memmove_move_33through64
emit_lit_memmove_lz4_mz_emit_final_memmove_move_1or2:
MOVB (DI), BL
MOVB -1(DI)(DX*1), DI
MOVB BL, (AX)
MOVB DI, -1(AX)(DX*1)
JMP memmove_end_copy_lz4_mz_emit_final
emit_lit_memmove_lz4_mz_emit_final_memmove_move_3:
MOVW (DI), BX
MOVB 2(DI), DI
MOVW BX, (AX)
MOVB DI, 2(AX)
JMP memmove_end_copy_lz4_mz_emit_final
emit_lit_memmove_lz4_mz_emit_final_memmove_move_4through8:
MOVL (DI), BX
MOVL -4(DI)(DX*1), DI
MOVL BX, (AX)
MOVL DI, -4(AX)(DX*1)
JMP memmove_end_copy_lz4_mz_emit_final
emit_lit_memmove_lz4_mz_emit_final_memmove_move_8through16:
MOVQ (DI), BX
MOVQ -8(DI)(DX*1), DI
MOVQ BX, (AX)
MOVQ DI, -8(AX)(DX*1)
JMP memmove_end_copy_lz4_mz_emit_final
emit_lit_memmove_lz4_mz_emit_final_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(DX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(DX*1)
JMP memmove_end_copy_lz4_mz_emit_final
emit_lit_memmove_lz4_mz_emit_final_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(DX*1), X2
MOVOU -16(DI)(DX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
memmove_end_copy_lz4_mz_emit_final:
MOVQ CX, AX
JMP lz4_mz_done
memmove_midlz4_mz_emit_final:
LEAQ (AX)(R10*1), CX
MOVL R10, DX
// genMemMoveShort
// margin: 0, min move: 30
CMPQ DX, $0x20
JBE emit_lit_memmove_mid_lz4_mz_emit_final_memmove_move_17through32
JMP emit_lit_memmove_mid_lz4_mz_emit_final_memmove_move_33through64
emit_lit_memmove_mid_lz4_mz_emit_final_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(DX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(DX*1)
JMP memmove_mid_end_copy_lz4_mz_emit_final
emit_lit_memmove_mid_lz4_mz_emit_final_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(DX*1), X2
MOVOU -16(DI)(DX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
memmove_mid_end_copy_lz4_mz_emit_final:
MOVQ CX, AX
JMP lz4_mz_done
memmove_long_lz4_mz_emit_final:
LEAQ (AX)(R10*1), CX
MOVL R10, DX
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(DX*1), X2
MOVOU -16(DI)(DX*1), X3
MOVQ DX, R8
SHRQ $0x05, R8
MOVQ AX, BX
ANDL $0x0000001f, BX
MOVQ $0x00000040, R9
SUBQ BX, R9
DECQ R8
JA emit_lit_memmove_long_lz4_mz_emit_finallarge_forward_sse_loop_32
LEAQ -32(DI)(R9*1), BX
LEAQ -32(AX)(R9*1), R10
emit_lit_memmove_long_lz4_mz_emit_finallarge_big_loop_back:
MOVOU (BX), X4
MOVOU 16(BX), X5
MOVOA X4, (R10)
MOVOA X5, 16(R10)
ADDQ $0x20, R10
ADDQ $0x20, BX
ADDQ $0x20, R9
DECQ R8
JNA emit_lit_memmove_long_lz4_mz_emit_finallarge_big_loop_back
emit_lit_memmove_long_lz4_mz_emit_finallarge_forward_sse_loop_32:
MOVOU -32(DI)(R9*1), X4
MOVOU -16(DI)(R9*1), X5
MOVOA X4, -32(AX)(R9*1)
MOVOA X5, -16(AX)(R9*1)
ADDQ $0x20, R9
CMPQ DX, R9
JAE emit_lit_memmove_long_lz4_mz_emit_finallarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
MOVQ CX, AX
lz4_mz_done:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ SI, uncompressed+48(FP)
MOVQ AX, dstUsed+56(FP)
RET
lz4_mz_corrupt:
XORQ AX, AX
LEAQ -1(AX), SI
MOVQ SI, uncompressed+48(FP)
RET
lz4_mz_dstfull:
XORQ AX, AX
LEAQ -2(AX), SI
MOVQ SI, uncompressed+48(FP)
RET
// func decodeBlockAsm(dst []byte, src []byte) int
// Requires: CMOV, SSE2
TEXT ·decodeBlockAsm(SB), $8-56
MOVQ dst_base+0(FP), AX
MOVQ dst_len+8(FP), CX
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
MOVQ AX, SI
XORQ DI, DI
MOVQ DX, R8
MOVQ $0x00000001, R9
LEAQ (AX)(CX*1), AX
LEAQ (DX)(BX*1), CX
LEAQ -20(CX), DX
LEAQ -20(AX), BX
CMPQ R8, DX
JAE decodeBlockAsm_fast_end_copy
MOVBQZX (R8), R10
MOVQ R10, R11
SHRQ $0x02, R11
decodeBlockAsm_fast_loop_nofetch:
CMPQ SI, BX
JAE decodeBlockAsm_fast_end_copy
ANDQ $0x03, R10
JNZ decodeBlockAsm_fast_copy
decodeBlockAsm_fast_lits:
MOVL R11, R12
SHRL $0x01, R12
CMPL R12, $0x1d
JB decodeBlockAsm_fast_lit_0
JEQ decodeBlockAsm_fast_lit_1
CMPL R12, $0x1e
JEQ decodeBlockAsm_fast_lit_2
JMP decodeBlockAsm_fast_lit_3
decodeBlockAsm_fast_lit_0:
INCQ R8
INCL R12
LEAQ (SI)(R12*1), R10
CMPQ R10, AX
JA corrupt
BTL $0x00, R11
JC decodeBlockAsm_fast_copy_exec_short
LEAQ (R8)(R12*1), R10
CMPQ R10, CX
JA corrupt
// genMemMoveShort
// margin: 19, min move: 1
CMPQ R12, $0x10
JBE decodeBlockAsm_fast_lit_0_copy_memmove_move_8through16
CMPQ R12, $0x20
JBE decodeBlockAsm_fast_lit_0_copy_memmove_move_17through32
JMP decodeBlockAsm_fast_lit_0_copy_memmove_move_33through64
decodeBlockAsm_fast_lit_0_copy_memmove_move_8through16:
MOVOU (R8), X0
MOVOU X0, (SI)
JMP decodeBlockAsm_fast_litcopy_done
decodeBlockAsm_fast_lit_0_copy_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(R12*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_litcopy_done
decodeBlockAsm_fast_lit_0_copy_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(R12*1), X2
MOVOU -16(R8)(R12*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(R12*1)
MOVOU X3, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_litcopy_done
decodeBlockAsm_fast_lit_1:
MOVBQZX 1(R8), R12
ADDQ $0x02, R8
JMP decodeBlockAsm_fast_litcopy_long
decodeBlockAsm_fast_lit_2:
MOVWQZX 1(R8), R12
ADDQ $0x03, R8
JMP decodeBlockAsm_fast_litcopy_long
decodeBlockAsm_fast_lit_3:
MOVL (R8), R12
ADDQ $0x04, R8
SHRL $0x08, R12
decodeBlockAsm_fast_litcopy_long:
LEAQ 30(R12), R12
LEAQ (SI)(R12*1), R10
CMPQ R10, AX
JA corrupt
BTL $0x00, R11
JC decodeBlockAsm_fast_copy_exec
LEAQ (R8)(R12*1), R10
CMPQ R10, CX
JA corrupt
CMPL R12, $0x40
JBE decodeBlockAsm_fast_litcopy_short_reduced
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(R12*1), X2
MOVOU -16(R8)(R12*1), X3
MOVQ R12, R11
SHRQ $0x05, R11
MOVQ SI, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R11
JA decodeBlockAsm_fast_litcopy_longlarge_forward_sse_loop_32
LEAQ -32(R8)(R13*1), R10
LEAQ -32(SI)(R13*1), R14
decodeBlockAsm_fast_litcopy_longlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R11
JNA decodeBlockAsm_fast_litcopy_longlarge_big_loop_back
decodeBlockAsm_fast_litcopy_longlarge_forward_sse_loop_32:
MOVOU -32(R8)(R13*1), X4
MOVOU -16(R8)(R13*1), X5
MOVOA X4, -32(SI)(R13*1)
MOVOA X5, -16(SI)(R13*1)
ADDQ $0x20, R13
CMPQ R12, R13
JAE decodeBlockAsm_fast_litcopy_longlarge_forward_sse_loop_32
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(R12*1)
MOVOU X3, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_litcopy_done
decodeBlockAsm_fast_litcopy_short_reduced:
// genMemMoveShort
// margin: 16, min move: 30
CMPQ R12, $0x20
JBE decodeBlockAsm_fast_lit_longer_copy_memmove_move_17through32
JMP decodeBlockAsm_fast_lit_longer_copy_memmove_move_33through64
decodeBlockAsm_fast_lit_longer_copy_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(R12*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_litcopy_done
decodeBlockAsm_fast_lit_longer_copy_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(R12*1), X2
MOVOU -16(R8)(R12*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(R12*1)
MOVOU X3, -16(SI)(R12*1)
decodeBlockAsm_fast_litcopy_done:
ADDQ R12, R8
ADDQ R12, SI
ADDQ R12, DI
CMPQ R8, DX
JAE decodeBlockAsm_fast_end_done
MOVBQZX (R8), R10
MOVQ R10, R11
SHRQ $0x02, R11
CMPQ SI, BX
JAE decodeBlockAsm_fast_end_done
ANDQ $0x03, R10
JZ decodeBlockAsm_fast_lits
decodeBlockAsm_fast_copy:
MOVL (R8), R13
CMPL R10, $0x02
JB decodeBlockAsm_fast_copy_1
JEQ decodeBlockAsm_fast_copy_2
JMP decodeBlockAsm_fast_copy_3
decodeBlockAsm_fast_copy_1:
MOVWQZX R13, R9
ADDQ $0x02, R8
MOVQ R11, R12
ANDL $0x0f, R12
SHRL $0x06, R9
INCL R9
SHRL $0x10, R13
LEAQ 1(R8), R10
MOVBLZX R13, R11
ADDL $0x04, R12
LEAL 18(R11), R11
CMPL R12, $0x13
CMOVLEQ R11, R12
CMOVQEQ R10, R8
JMP decodeBlockAsm_fast_copy_exec
decodeBlockAsm_fast_copy_2:
MOVQ R11, R12
CMPL R11, $0x3d
JB decodeBlockAsm_fast_copy_2_0_extra
JEQ decodeBlockAsm_fast_copy_2_1_extra
CMPL R12, $0x3f
JB decodeBlockAsm_fast_copy_2_2_extra
MOVWQZX 1(R8), R9
MOVL 2(R8), R12
ADDQ $0x06, R8
SHRL $0x08, R12
LEAL 64(R12), R12
ADDQ $0x40, R9
JMP decodeBlockAsm_fast_copy_exec_long_long
decodeBlockAsm_fast_copy_2_2_extra:
MOVWQZX 1(R8), R9
MOVWLZX 3(R8), R12
ADDQ $0x05, R8
LEAL 64(R12), R12
ADDQ $0x40, R9
JMP decodeBlockAsm_fast_copy_exec_long_long
decodeBlockAsm_fast_copy_2_1_extra:
MOVL R13, R12
SHRL $0x08, R13
SHRL $0x18, R12
MOVWQZX R13, R9
ADDQ $0x04, R8
LEAL 64(R12), R12
ADDQ $0x40, R9
JMP decodeBlockAsm_fast_copy_exec_long_long
decodeBlockAsm_fast_copy_2_0_extra:
SHRL $0x08, R13
MOVWQZX R13, R9
LEAQ 3(R8), R8
LEAL 4(R12), R12
ADDQ $0x40, R9
JMP decodeBlockAsm_fast_copy_short_no_ol
decodeBlockAsm_fast_copy_3:
MOVL R13, R9
ADDQ $0x04, R8
MOVQ R11, R10
SHRQ $0x01, R10
ANDQ $0x03, R10
BTL $0x00, R11
JC decodeBlockAsm_fast_copy3_read
SHRL $0x03, R11
ANDL $0x07, R11
LEAL 4(R11), R12
SHRL $0x08, R13
MOVWQZX R13, R9
DECQ R8
INCQ R10
MOVL (R8), R11
MOVL R11, (SI)
ADDQ $0x40, R9
ADDQ R10, R8
ADDQ R10, SI
ADDQ R10, DI
JMP decodeBlockAsm_fast_copy_short_no_ol
decodeBlockAsm_fast_copy3_read:
MOVL R9, R12
SHRL $0x05, R12
ANDL $0x3f, R12
SHRL $0x0b, R9
ADDL $0x00010000, R9
CMPL R12, $0x3d
JB decodeBlockAsm_fast_copy_3_0_extra
JEQ decodeBlockAsm_fast_copy_3_1_extra
CMPL R12, $0x3e
JEQ decodeBlockAsm_fast_copy_3_2_extra
MOVL -1(R8), R12
ADDQ $0x03, R8
SHRL $0x08, R12
LEAL 64(R12), R12
JMP decodeBlockAsm_fast_copy_fused_long
decodeBlockAsm_fast_copy_3_2_extra:
MOVWLZX (R8), R12
ADDQ $0x02, R8
LEAL 64(R12), R12
JMP decodeBlockAsm_fast_copy_fused_long
decodeBlockAsm_fast_copy_3_1_extra:
MOVBLZX (R8), R12
ADDQ $0x01, R8
LEAL 64(R12), R12
JMP decodeBlockAsm_fast_copy_fused_long
decodeBlockAsm_fast_copy_3_0_extra:
LEAL 4(R12), R12
MOVL (R8), R11
MOVL R11, (SI)
ADDQ R10, R8
ADDQ R10, SI
ADDQ R10, DI
JMP decodeBlockAsm_fast_copy_short_no_ol
decodeBlockAsm_fast_copy_fused_long:
MOVL (R8), R11
MOVL R11, (SI)
ADDQ R10, R8
ADDQ R10, SI
ADDQ R10, DI
JMP decodeBlockAsm_fast_copy_exec_long_long
decodeBlockAsm_fast_copy_exec_short:
CMPL R9, DI
JA corrupt
LEAQ (SI)(R12*1), R10
CMPQ R10, AX
JA corrupt
// Prefetch next tag
MOVBQZX (R8), R10
MOVQ SI, R11
SUBQ R9, R11
CMPL R9, R12
JB decodeBlockAsm_fast_copy_overlap
JMP decodeBlockAsm_fast_copy_short
decodeBlockAsm_fast_copy_exec_long_long:
MOVQ SI, R11
SUBQ R9, R11
CMPL R9, DI
JA corrupt
LEAQ (SI)(R12*1), R10
CMPQ R10, AX
JA corrupt
// Prefetch next tag
MOVBQZX (R8), R10
// genMemMoveLong
MOVQ R12, R13
SHRQ $0x05, R13
MOVQ SI, R14
MOVQ R12, R15
decodeBlockAsm_fast_copy_long_longlarge_big_loop_back:
MOVOU (R11), X0
MOVOU 16(R11), X1
MOVOU X0, (R14)
MOVOU X1, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
SUBQ $0x20, R15
DECQ R13
JNZ decodeBlockAsm_fast_copy_long_longlarge_big_loop_back
TESTQ R15, R15
JZ decodeBlockAsm_fast_copy_done
MOVOU -32(R11)(R15*1), X0
MOVOU -16(R11)(R15*1), X1
MOVOU X0, -32(R14)(R15*1)
MOVOU X1, -16(R14)(R15*1)
JMP decodeBlockAsm_fast_copy_done
decodeBlockAsm_fast_copy_short_no_ol:
MOVQ SI, R11
SUBQ R9, R11
CMPL R9, DI
JA corrupt
LEAQ (SI)(R12*1), R10
CMPQ R10, AX
JA corrupt
// Prefetch next tag
MOVBQZX (R8), R10
// genMemMoveShort
// margin: 16, min move: 4
CMPQ R12, $0x10
JBE decodeBlockAsm_fast_copy_short_no_ol_memmove_move_8through16
CMPQ R12, $0x20
JBE decodeBlockAsm_fast_copy_short_no_ol_memmove_move_17through32
JMP decodeBlockAsm_fast_copy_short_no_ol_memmove_move_33through64
decodeBlockAsm_fast_copy_short_no_ol_memmove_move_8through16:
MOVOU (R11), X0
MOVOU X0, (SI)
JMP decodeBlockAsm_fast_copy_done
decodeBlockAsm_fast_copy_short_no_ol_memmove_move_17through32:
MOVOU (R11), X0
MOVOU -16(R11)(R12*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_copy_done
decodeBlockAsm_fast_copy_short_no_ol_memmove_move_33through64:
MOVOU (R11), X0
MOVOU 16(R11), X1
MOVOU -32(R11)(R12*1), X2
MOVOU -16(R11)(R12*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(R12*1)
MOVOU X3, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_copy_done
decodeBlockAsm_fast_copy_exec:
CMPL R9, DI
JA corrupt
LEAQ (SI)(R12*1), R10
CMPQ R10, AX
JA corrupt
MOVQ SI, R11
SUBQ R9, R11
// Prefetch next tag
MOVBQZX (R8), R10
CMPL R9, R12
JB decodeBlockAsm_fast_copy_overlap
CMPL R12, $0x40
JA decodeBlockAsm_fast_copy_long
decodeBlockAsm_fast_copy_short:
// genMemMoveShort
// margin: 16, min move: 1
CMPQ R12, $0x10
JBE decodeBlockAsm_fast_copy_short_memmove_move_8through16
CMPQ R12, $0x20
JBE decodeBlockAsm_fast_copy_short_memmove_move_17through32
JMP decodeBlockAsm_fast_copy_short_memmove_move_33through64
decodeBlockAsm_fast_copy_short_memmove_move_8through16:
MOVOU (R11), X0
MOVOU X0, (SI)
JMP decodeBlockAsm_fast_copy_done
decodeBlockAsm_fast_copy_short_memmove_move_17through32:
MOVOU (R11), X0
MOVOU -16(R11)(R12*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_copy_done
decodeBlockAsm_fast_copy_short_memmove_move_33through64:
MOVOU (R11), X0
MOVOU 16(R11), X1
MOVOU -32(R11)(R12*1), X2
MOVOU -16(R11)(R12*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(R12*1)
MOVOU X3, -16(SI)(R12*1)
JMP decodeBlockAsm_fast_copy_done
decodeBlockAsm_fast_copy_long:
// genMemMoveLong
MOVOU (R11), X0
MOVOU 16(R11), X1
MOVOU -32(R11)(R12*1), X2
MOVOU -16(R11)(R12*1), X3
MOVQ R12, R14
SHRQ $0x05, R14
MOVQ SI, R13
ANDL $0x0000001f, R13
MOVQ $0x00000040, R15
SUBQ R13, R15
DECQ R14
JA decodeBlockAsm_fast_copy_longlarge_forward_sse_loop_32
LEAQ -32(R11)(R15*1), R13
LEAQ -32(SI)(R15*1), BP
decodeBlockAsm_fast_copy_longlarge_big_loop_back:
MOVOU (R13), X4
MOVOU 16(R13), X5
MOVOA X4, (BP)
MOVOA X5, 16(BP)
ADDQ $0x20, BP
ADDQ $0x20, R13
ADDQ $0x20, R15
DECQ R14
JNA decodeBlockAsm_fast_copy_longlarge_big_loop_back
decodeBlockAsm_fast_copy_longlarge_forward_sse_loop_32:
MOVOU -32(R11)(R15*1), X4
MOVOU -16(R11)(R15*1), X5
MOVOA X4, -32(SI)(R15*1)
MOVOA X5, -16(SI)(R15*1)
ADDQ $0x20, R15
CMPQ R12, R15
JAE decodeBlockAsm_fast_copy_longlarge_forward_sse_loop_32
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(R12*1)
MOVOU X3, -16(SI)(R12*1)
decodeBlockAsm_fast_copy_done:
ADDQ R12, SI
ADDQ R12, DI
MOVQ R10, R11
SHRQ $0x02, R11
CMPQ R8, DX
JB decodeBlockAsm_fast_loop_nofetch
JMP decodeBlockAsm_fast_end_copy
decodeBlockAsm_fast_copy_overlap:
CMPL R9, $0x03
JA decodeBlockAsm_fast_copy_overlap_4
JE decodeBlockAsm_fast_copy_overlap_3
CMPL R9, $0x02
JE decodeBlockAsm_fast_copy_overlap_2
MOVB (R11), R11
ADDQ R12, DI
decodeBlockAsm_fast_loop_overlap_1:
MOVB R11, (SI)
INCQ SI
DECQ R12
JNZ decodeBlockAsm_fast_loop_overlap_1
MOVQ R10, R11
SHRQ $0x02, R11
CMPQ R8, DX
JB decodeBlockAsm_fast_loop_nofetch
JMP decodeBlockAsm_fast_end_copy
decodeBlockAsm_fast_copy_overlap_2:
MOVW (R11), R13
ADDQ R12, DI
BTL $0x00, R12
JNC decodeBlockAsm_fast_loop_overlap_2
MOVB R13, (SI)
MOVW 1(R11), R13
INCQ SI
DECQ R12
decodeBlockAsm_fast_loop_overlap_2:
MOVW R13, (SI)
ADDQ $0x02, SI
SUBQ $0x02, R12
JNZ decodeBlockAsm_fast_loop_overlap_2
MOVQ R10, R11
SHRQ $0x02, R11
CMPQ R8, DX
JB decodeBlockAsm_fast_loop_nofetch
JMP decodeBlockAsm_fast_end_copy
decodeBlockAsm_fast_copy_overlap_3:
MOVL (R11), R13
ADDQ R12, DI
SUBQ $0x03, R12
decodeBlockAsm_fast_loop_overlap_3:
MOVL R13, (SI)
ADDQ $0x03, SI
SUBQ $0x03, R12
JA decodeBlockAsm_fast_loop_overlap_3
MOVW 3(R11)(R12*1), R13
MOVW R13, (SI)(R12*1)
MOVB 5(R11)(R12*1), R13
MOVB R13, 2(SI)(R12*1)
LEAQ 3(SI)(R12*1), SI
MOVQ R10, R11
SHRQ $0x02, R11
CMPQ R8, DX
JB decodeBlockAsm_fast_loop_nofetch
JMP decodeBlockAsm_fast_end_copy
decodeBlockAsm_fast_copy_overlap_4:
ADDQ R12, DI
SUBQ $0x04, R12
decodeBlockAsm_fast_loop_overlap_4:
MOVL (R11), R13
ADDQ $0x04, R11
MOVL R13, (SI)
ADDQ $0x04, SI
SUBQ $0x04, R12
JA decodeBlockAsm_fast_loop_overlap_4
MOVL (R11)(R12*1), R13
MOVL R13, (SI)(R12*1)
LEAQ 4(SI)(R12*1), SI
MOVQ R10, R11
SHRQ $0x02, R11
CMPQ R8, DX
JB decodeBlockAsm_fast_loop_nofetch
decodeBlockAsm_fast_end_copy:
decodeBlockAsm_fast_end_done:
decodeBlockAsm_remain_loop:
CMPQ R8, CX
JAE decodeBlockAsm_remain_end_copy
MOVBQZX (R8), DX
MOVQ DX, BX
SHRQ $0x02, BX
CMPQ SI, AX
JAE decodeBlockAsm_remain_end_copy
ANDQ $0x03, DX
JNZ decodeBlockAsm_remain_copy
decodeBlockAsm_remain_lits:
MOVL BX, DX
SHRL $0x01, DX
CMPL DX, $0x1d
JB decodeBlockAsm_remain_lit_0
JEQ decodeBlockAsm_remain_lit_1
CMPL DX, $0x1e
JEQ decodeBlockAsm_remain_lit_2
JMP decodeBlockAsm_remain_lit_3
decodeBlockAsm_remain_lit_0:
INCQ R8
INCL DX
LEAQ (SI)(DX*1), R10
CMPQ R10, AX
JA corrupt
BTL $0x00, BX
JC decodeBlockAsm_remain_copy_exec_short
LEAQ (R8)(DX*1), BX
CMPQ BX, CX
JA corrupt
// genMemMoveShort
// margin: -1, min move: 1
CMPQ DX, $0x03
JB decodeBlockAsm_remain_lit_0_copy_memmove_move_1or2
JE decodeBlockAsm_remain_lit_0_copy_memmove_move_3
CMPQ DX, $0x08
JBE decodeBlockAsm_remain_lit_0_copy_memmove_move_4through8
CMPQ DX, $0x10
JBE decodeBlockAsm_remain_lit_0_copy_memmove_move_8through16
CMPQ DX, $0x20
JBE decodeBlockAsm_remain_lit_0_copy_memmove_move_17through32
JMP decodeBlockAsm_remain_lit_0_copy_memmove_move_33through64
decodeBlockAsm_remain_lit_0_copy_memmove_move_1or2:
MOVB (R8), BL
MOVB -1(R8)(DX*1), R10
MOVB BL, (SI)
MOVB R10, -1(SI)(DX*1)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_lit_0_copy_memmove_move_3:
MOVW (R8), BX
MOVB 2(R8), R10
MOVW BX, (SI)
MOVB R10, 2(SI)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_lit_0_copy_memmove_move_4through8:
MOVL (R8), BX
MOVL -4(R8)(DX*1), R10
MOVL BX, (SI)
MOVL R10, -4(SI)(DX*1)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_lit_0_copy_memmove_move_8through16:
MOVQ (R8), BX
MOVQ -8(R8)(DX*1), R10
MOVQ BX, (SI)
MOVQ R10, -8(SI)(DX*1)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_lit_0_copy_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DX*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_lit_0_copy_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DX*1), X2
MOVOU -16(R8)(DX*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(DX*1)
MOVOU X3, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_lit_1:
ADDQ $0x02, R8
CMPQ R8, CX
JA corrupt
MOVBQZX -1(R8), DX
JMP decodeBlockAsm_remain_litcopy_long
decodeBlockAsm_remain_lit_2:
ADDQ $0x03, R8
CMPQ R8, CX
JA corrupt
MOVWQZX -2(R8), DX
JMP decodeBlockAsm_remain_litcopy_long
decodeBlockAsm_remain_lit_3:
ADDQ $0x04, R8
CMPQ R8, CX
JA corrupt
MOVL -4(R8), DX
SHRL $0x08, DX
decodeBlockAsm_remain_litcopy_long:
LEAQ 30(DX), DX
LEAQ (SI)(DX*1), R10
CMPQ R10, AX
JA corrupt
BTL $0x00, BX
JC decodeBlockAsm_remain_copy_exec
LEAQ (R8)(DX*1), BX
CMPQ BX, CX
JA corrupt
CMPL DX, $0x40
JBE decodeBlockAsm_remain_litcopy_short_reduced
// genMemMoveLong
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DX*1), X2
MOVOU -16(R8)(DX*1), X3
MOVQ DX, R10
SHRQ $0x05, R10
MOVQ SI, BX
ANDL $0x0000001f, BX
MOVQ $0x00000040, R11
SUBQ BX, R11
DECQ R10
JA decodeBlockAsm_remain_litcopy_longlarge_forward_sse_loop_32
LEAQ -32(R8)(R11*1), BX
LEAQ -32(SI)(R11*1), R12
decodeBlockAsm_remain_litcopy_longlarge_big_loop_back:
MOVOU (BX), X4
MOVOU 16(BX), X5
MOVOA X4, (R12)
MOVOA X5, 16(R12)
ADDQ $0x20, R12
ADDQ $0x20, BX
ADDQ $0x20, R11
DECQ R10
JNA decodeBlockAsm_remain_litcopy_longlarge_big_loop_back
decodeBlockAsm_remain_litcopy_longlarge_forward_sse_loop_32:
MOVOU -32(R8)(R11*1), X4
MOVOU -16(R8)(R11*1), X5
MOVOA X4, -32(SI)(R11*1)
MOVOA X5, -16(SI)(R11*1)
ADDQ $0x20, R11
CMPQ DX, R11
JAE decodeBlockAsm_remain_litcopy_longlarge_forward_sse_loop_32
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(DX*1)
MOVOU X3, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_litcopy_short_reduced:
// genMemMoveShort
// margin: -4, min move: 30
CMPQ DX, $0x20
JBE decodeBlockAsm_remain_lit_longer_copy_memmove_move_17through32
JMP decodeBlockAsm_remain_lit_longer_copy_memmove_move_33through64
decodeBlockAsm_remain_lit_longer_copy_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DX*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_litcopy_done
decodeBlockAsm_remain_lit_longer_copy_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DX*1), X2
MOVOU -16(R8)(DX*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(DX*1)
MOVOU X3, -16(SI)(DX*1)
decodeBlockAsm_remain_litcopy_done:
ADDQ DX, R8
ADDQ DX, SI
ADDQ DX, DI
CMPQ R8, CX
JAE decodeBlockAsm_remain_end_done
MOVBQZX (R8), DX
MOVQ DX, BX
SHRQ $0x02, BX
CMPQ SI, AX
JAE decodeBlockAsm_remain_end_done
ANDQ $0x03, DX
JZ decodeBlockAsm_remain_lits
decodeBlockAsm_remain_copy:
CMPL DX, $0x02
JB decodeBlockAsm_remain_copy_1
JEQ decodeBlockAsm_remain_copy_2
JMP decodeBlockAsm_remain_copy_3
decodeBlockAsm_remain_copy_1:
ADDQ $0x02, R8
CMPQ R8, CX
JA corrupt
MOVWQZX -2(R8), R9
MOVQ BX, DX
ANDL $0x0f, DX
SHRL $0x06, R9
INCL R9
CMPL DX, $0x0f
JNE decodeBlockAsm_remain_copy_1_short
ADDQ $0x01, R8
CMPQ R8, CX
JA corrupt
MOVBLZX -1(R8), DX
LEAL 18(DX), DX
JMP decodeBlockAsm_remain_copy_exec
decodeBlockAsm_remain_copy_1_short:
LEAL 4(DX), DX
JMP decodeBlockAsm_remain_copy_exec_short
decodeBlockAsm_remain_copy_2:
MOVQ BX, DX
CMPL BX, $0x3d
JB decodeBlockAsm_remain_copy_2_0_extra
JEQ decodeBlockAsm_remain_copy_2_1_extra
CMPL DX, $0x3f
JB decodeBlockAsm_remain_copy_2_2_extra
ADDQ $0x06, R8
CMPQ R8, CX
JA corrupt
MOVWQZX -5(R8), R9
MOVL -4(R8), DX
SHRL $0x08, DX
LEAL 64(DX), DX
ADDQ $0x40, R9
JMP decodeBlockAsm_remain_copy_exec_long_long
decodeBlockAsm_remain_copy_2_2_extra:
ADDQ $0x05, R8
CMPQ R8, CX
JA corrupt
MOVWQZX -4(R8), R9
MOVWLZX -2(R8), DX
LEAL 64(DX), DX
ADDQ $0x40, R9
JMP decodeBlockAsm_remain_copy_exec_long_long
decodeBlockAsm_remain_copy_2_1_extra:
ADDQ $0x04, R8
CMPQ R8, CX
JA corrupt
MOVWQZX -3(R8), R9
MOVBLZX -1(R8), DX
LEAL 64(DX), DX
ADDQ $0x40, R9
JMP decodeBlockAsm_remain_copy_exec_long_long
decodeBlockAsm_remain_copy_2_0_extra:
LEAQ 3(R8), R8
CMPQ R8, CX
JA corrupt
MOVWQZX -2(R8), R9
LEAL 4(DX), DX
ADDQ $0x40, R9
JMP decodeBlockAsm_remain_copy_short_no_ol
decodeBlockAsm_remain_copy_3:
ADDQ $0x04, R8
CMPQ R8, CX
JA corrupt
MOVL -4(R8), R9
MOVQ BX, R10
SHRQ $0x01, R10
ANDQ $0x03, R10
BTL $0x00, BX
JC decodeBlockAsm_remain_copy3_read
SHRL $0x03, BX
ANDL $0x07, BX
LEAL 4(BX), DX
MOVWQZX -3(R8), R9
DECQ R8
INCQ R10
LEAQ (R8)(R10*1), BX
LEAQ (SI)(R10*1), R11
CMPQ BX, CX
JA corrupt
CMPQ R11, AX
JA corrupt
// genMemMoveVeryShort
CMPQ R10, $0x03
JE decodeBlockAsm_remain_copy2_fused_lits_move_3
JA decodeBlockAsm_remain_copy2_fused_lits_move_4
MOVB (R8), BL
MOVB -1(R8)(R10*1), R11
MOVB BL, (SI)
MOVB R11, -1(SI)(R10*1)
JMP decodeBlockAsm_remain_copy2_fused_lits_done
decodeBlockAsm_remain_copy2_fused_lits_move_3:
MOVW (R8), BX
MOVB 2(R8), R11
MOVW BX, (SI)
MOVB R11, 2(SI)
JMP decodeBlockAsm_remain_copy2_fused_lits_done
decodeBlockAsm_remain_copy2_fused_lits_move_4:
MOVL (R8), BX
MOVL BX, (SI)
decodeBlockAsm_remain_copy2_fused_lits_done:
ADDQ $0x40, R9
ADDQ R10, R8
ADDQ R10, SI
ADDQ R10, DI
JMP decodeBlockAsm_remain_copy_short_no_ol
decodeBlockAsm_remain_copy3_read:
MOVL R9, DX
SHRL $0x05, DX
ANDL $0x3f, DX
SHRL $0x0b, R9
ADDL $0x00010000, R9
CMPL DX, $0x3d
JB decodeBlockAsm_remain_copy_3_0_extra
JEQ decodeBlockAsm_remain_copy_3_1_extra
CMPL DX, $0x3e
JEQ decodeBlockAsm_remain_copy_3_2_extra
ADDQ $0x03, R8
CMPQ R8, CX
JA corrupt
MOVL -4(R8), DX
SHRL $0x08, DX
LEAL 64(DX), DX
JMP decodeBlockAsm_remain_copy_fused_long
decodeBlockAsm_remain_copy_3_2_extra:
ADDQ $0x02, R8
CMPQ R8, CX
JA corrupt
MOVWLZX -2(R8), DX
LEAL 64(DX), DX
JMP decodeBlockAsm_remain_copy_fused_long
decodeBlockAsm_remain_copy_3_1_extra:
ADDQ $0x01, R8
CMPQ R8, CX
JA corrupt
MOVBLZX -1(R8), DX
LEAL 64(DX), DX
JMP decodeBlockAsm_remain_copy_fused_long
decodeBlockAsm_remain_copy_3_0_extra:
LEAL 4(DX), DX
TESTL R10, R10
JZ decodeBlockAsm_remain_copy_short_no_ol
LEAQ (R8)(R10*1), BX
LEAQ (SI)(R10*1), R11
CMPQ BX, CX
JA corrupt
CMPQ R11, AX
JA corrupt
// genMemMoveVeryShort
CMPQ R10, $0x03
JE decodeBlockAsm_remain_copy3s_fused_lits_move_3
JA decodeBlockAsm_remain_copy3s_fused_lits_move_4
MOVB (R8), BL
MOVB -1(R8)(R10*1), R11
MOVB BL, (SI)
MOVB R11, -1(SI)(R10*1)
JMP decodeBlockAsm_remain_copy3s_fused_lits_done
decodeBlockAsm_remain_copy3s_fused_lits_move_3:
MOVW (R8), BX
MOVB 2(R8), R11
MOVW BX, (SI)
MOVB R11, 2(SI)
JMP decodeBlockAsm_remain_copy3s_fused_lits_done
decodeBlockAsm_remain_copy3s_fused_lits_move_4:
MOVL (R8), BX
MOVL BX, (SI)
decodeBlockAsm_remain_copy3s_fused_lits_done:
ADDQ R10, R8
ADDQ R10, SI
ADDQ R10, DI
JMP decodeBlockAsm_remain_copy_short_no_ol
decodeBlockAsm_remain_copy_fused_long:
TESTL R10, R10
JZ decodeBlockAsm_remain_copy_exec_long_long
LEAQ (R8)(R10*1), BX
LEAQ (SI)(R10*1), R11
CMPQ BX, CX
JA corrupt
CMPQ R11, AX
JA corrupt
// genMemMoveVeryShort
CMPQ R10, $0x03
JE decodeBlockAsm_remain_copy3_fused_lits_move_3
JA decodeBlockAsm_remain_copy3_fused_lits_move_4
MOVB (R8), BL
MOVB -1(R8)(R10*1), R11
MOVB BL, (SI)
MOVB R11, -1(SI)(R10*1)
JMP decodeBlockAsm_remain_copy3_fused_lits_done
decodeBlockAsm_remain_copy3_fused_lits_move_3:
MOVW (R8), BX
MOVB 2(R8), R11
MOVW BX, (SI)
MOVB R11, 2(SI)
JMP decodeBlockAsm_remain_copy3_fused_lits_done
decodeBlockAsm_remain_copy3_fused_lits_move_4:
MOVL (R8), BX
MOVL BX, (SI)
decodeBlockAsm_remain_copy3_fused_lits_done:
ADDQ R10, R8
ADDQ R10, SI
ADDQ R10, DI
JMP decodeBlockAsm_remain_copy_exec_long_long
decodeBlockAsm_remain_copy_exec_short:
CMPL R9, DI
JA corrupt
LEAQ (SI)(DX*1), BX
CMPQ BX, AX
JA corrupt
MOVQ SI, BX
SUBQ R9, BX
CMPL R9, DX
JB decodeBlockAsm_remain_copy_overlap
JMP decodeBlockAsm_remain_copy_short
decodeBlockAsm_remain_copy_exec_long_long:
MOVQ SI, BX
SUBQ R9, BX
CMPL R9, DI
JA corrupt
LEAQ (SI)(DX*1), R10
CMPQ R10, AX
JA corrupt
// genMemMoveLong
MOVQ DX, R10
SHRQ $0x05, R10
MOVQ SI, R11
MOVQ DX, R12
decodeBlockAsm_remain_copy_long_longlarge_big_loop_back:
MOVOU (BX), X0
MOVOU 16(BX), X1
MOVOU X0, (R11)
MOVOU X1, 16(R11)
ADDQ $0x20, R11
ADDQ $0x20, BX
SUBQ $0x20, R12
DECQ R10
JNZ decodeBlockAsm_remain_copy_long_longlarge_big_loop_back
TESTQ R12, R12
JZ decodeBlockAsm_remain_copy_done
MOVOU -32(BX)(R12*1), X0
MOVOU -16(BX)(R12*1), X1
MOVOU X0, -32(R11)(R12*1)
MOVOU X1, -16(R11)(R12*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_no_ol:
MOVQ SI, BX
SUBQ R9, BX
CMPL R9, DI
JA corrupt
LEAQ (SI)(DX*1), R10
CMPQ R10, AX
JA corrupt
// genMemMoveShort
// margin: -4, min move: 4
CMPQ DX, $0x08
JBE decodeBlockAsm_remain_copy_short_no_ol_memmove_move_4through8
CMPQ DX, $0x10
JBE decodeBlockAsm_remain_copy_short_no_ol_memmove_move_8through16
CMPQ DX, $0x20
JBE decodeBlockAsm_remain_copy_short_no_ol_memmove_move_17through32
JMP decodeBlockAsm_remain_copy_short_no_ol_memmove_move_33through64
decodeBlockAsm_remain_copy_short_no_ol_memmove_move_4through8:
MOVL (BX), R10
MOVL -4(BX)(DX*1), BX
MOVL R10, (SI)
MOVL BX, -4(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_no_ol_memmove_move_8through16:
MOVQ (BX), R10
MOVQ -8(BX)(DX*1), BX
MOVQ R10, (SI)
MOVQ BX, -8(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_no_ol_memmove_move_17through32:
MOVOU (BX), X0
MOVOU -16(BX)(DX*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_no_ol_memmove_move_33through64:
MOVOU (BX), X0
MOVOU 16(BX), X1
MOVOU -32(BX)(DX*1), X2
MOVOU -16(BX)(DX*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(DX*1)
MOVOU X3, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_exec:
CMPL R9, DI
JA corrupt
LEAQ (SI)(DX*1), BX
CMPQ BX, AX
JA corrupt
MOVQ SI, BX
SUBQ R9, BX
CMPL R9, DX
JB decodeBlockAsm_remain_copy_overlap
CMPL DX, $0x40
JA decodeBlockAsm_remain_copy_long
decodeBlockAsm_remain_copy_short:
// genMemMoveShort
// margin: -4, min move: 1
CMPQ DX, $0x03
JB decodeBlockAsm_remain_copy_short_memmove_move_1or2
JE decodeBlockAsm_remain_copy_short_memmove_move_3
CMPQ DX, $0x08
JBE decodeBlockAsm_remain_copy_short_memmove_move_4through8
CMPQ DX, $0x10
JBE decodeBlockAsm_remain_copy_short_memmove_move_8through16
CMPQ DX, $0x20
JBE decodeBlockAsm_remain_copy_short_memmove_move_17through32
JMP decodeBlockAsm_remain_copy_short_memmove_move_33through64
decodeBlockAsm_remain_copy_short_memmove_move_1or2:
MOVB (BX), R10
MOVB -1(BX)(DX*1), BL
MOVB R10, (SI)
MOVB BL, -1(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_memmove_move_3:
MOVW (BX), R10
MOVB 2(BX), BL
MOVW R10, (SI)
MOVB BL, 2(SI)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_memmove_move_4through8:
MOVL (BX), R10
MOVL -4(BX)(DX*1), BX
MOVL R10, (SI)
MOVL BX, -4(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_memmove_move_8through16:
MOVQ (BX), R10
MOVQ -8(BX)(DX*1), BX
MOVQ R10, (SI)
MOVQ BX, -8(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_memmove_move_17through32:
MOVOU (BX), X0
MOVOU -16(BX)(DX*1), X1
MOVOU X0, (SI)
MOVOU X1, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_short_memmove_move_33through64:
MOVOU (BX), X0
MOVOU 16(BX), X1
MOVOU -32(BX)(DX*1), X2
MOVOU -16(BX)(DX*1), X3
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(DX*1)
MOVOU X3, -16(SI)(DX*1)
JMP decodeBlockAsm_remain_copy_done
decodeBlockAsm_remain_copy_long:
// genMemMoveLong
MOVOU (BX), X0
MOVOU 16(BX), X1
MOVOU -32(BX)(DX*1), X2
MOVOU -16(BX)(DX*1), X3
MOVQ DX, R11
SHRQ $0x05, R11
MOVQ SI, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA decodeBlockAsm_remain_copy_longlarge_forward_sse_loop_32
LEAQ -32(BX)(R12*1), R10
LEAQ -32(SI)(R12*1), R13
decodeBlockAsm_remain_copy_longlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA decodeBlockAsm_remain_copy_longlarge_big_loop_back
decodeBlockAsm_remain_copy_longlarge_forward_sse_loop_32:
MOVOU -32(BX)(R12*1), X4
MOVOU -16(BX)(R12*1), X5
MOVOA X4, -32(SI)(R12*1)
MOVOA X5, -16(SI)(R12*1)
ADDQ $0x20, R12
CMPQ DX, R12
JAE decodeBlockAsm_remain_copy_longlarge_forward_sse_loop_32
MOVOU X0, (SI)
MOVOU X1, 16(SI)
MOVOU X2, -32(SI)(DX*1)
MOVOU X3, -16(SI)(DX*1)
decodeBlockAsm_remain_copy_done:
ADDQ DX, SI
ADDQ DX, DI
JMP decodeBlockAsm_remain_loop
decodeBlockAsm_remain_copy_overlap:
ADDQ DX, DI
decodeBlockAsm_remain_copy_overlap_simple:
MOVB (BX), R10
MOVB R10, (SI)
INCQ BX
INCQ SI
DECQ DX
JNZ decodeBlockAsm_remain_copy_overlap_simple
JMP decodeBlockAsm_remain_loop
decodeBlockAsm_remain_end_copy:
decodeBlockAsm_remain_end_done:
MOVQ src_base+24(FP), AX
MOVQ src_len+32(FP), CX
MOVQ dst_base+0(FP), DX
MOVQ dst_len+8(FP), BX
LEAQ (DX)(BX*1), DX
LEAQ (AX)(CX*1), AX
CMPQ SI, DX
JNE corrupt
CMPQ R8, AX
JNE corrupt
MOVQ $0x00000000, ret+48(FP)
RET
corrupt:
MOVQ $0x00000001, ret+48(FP)
RET