
198 lines
4.0 KiB
Raw Normal View History

// +build gc
// +build !noasm
#include "textflag.h"
// Register allocation.
#define dst R0
#define dstorig R1
#define src R2
#define dstend R3
#define srcend R4
#define match R5 // Match address.
#define token R6
#define len R7 // Literal and match lengths.
#define offset R6 // Match offset; overlaps with token.
#define tmp1 R8
#define tmp2 R9
#define tmp3 R12
#define minMatch $4
// func decodeBlockNodict(dst, src []byte) int
TEXT ·decodeBlockNodict(SB), NOFRAME+NOSPLIT, $-4-28
MOVW dst_base +0(FP), dst
MOVW dst_len +4(FP), dstend
MOVW src_base +12(FP), src
MOVW src_len +16(FP), srcend
CMP $0, srcend
BEQ shortSrc
ADD dst, dstend
ADD src, srcend
MOVW dst, dstorig
// Read token. Extract literal length.
MOVBU.P 1(src), token
MOVW token >> 4, len
CMP $15, len
BNE readLitlenDone
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD tmp1, len
CMP $255, tmp1
BEQ readLitlenLoop
CMP $0, len
BEQ copyLiteralDone
// Bounds check dst+len and src+len.
ADD dst, len, tmp1
CMP dstend, tmp1
//BHI shortDst // Uncomment for distinct error codes.
ADD src, len, tmp2
CMP.LS srcend, tmp2
BHI shortSrc
// Copy literal.
CMP $4, len
BLO copyLiteralFinish
// Copy 0-3 bytes until src is aligned.
TST $1, src
MOVBU.NE.P 1(src), tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $1, len
TST $2, src
MOVHU.NE.P 2(src), tmp2
MOVB.NE.P tmp2, 1(dst)
MOVW.NE tmp2 >> 8, tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $2, len
B copyLiteralLoopCond
// Aligned load, unaligned write.
MOVW.P 4(src), tmp1
MOVW tmp1 >> 8, tmp2
MOVB tmp2, 1(dst)
MOVW tmp1 >> 16, tmp3
MOVB tmp3, 2(dst)
MOVW tmp1 >> 24, tmp2
MOVB tmp2, 3(dst)
MOVB.P tmp1, 4(dst)
// Loop until len-4 < 0.
SUB.S $4, len
BPL copyLiteralLoop
// Restore len, which is now negative.
ADD $4, len
// Copy remaining 0-3 bytes.
TST $2, len
MOVHU.NE.P 2(src), tmp2
MOVB.NE.P tmp2, 1(dst)
MOVW.NE tmp2 >> 8, tmp1
MOVB.NE.P tmp1, 1(dst)
TST $1, len
MOVBU.NE.P 1(src), tmp1
MOVB.NE.P tmp1, 1(dst)
CMP src, srcend
BEQ end
// Initial part of match length.
// This frees up the token register for reuse as offset.
AND $15, token, len
// Read offset.
ADD $2, src
CMP srcend, src
BHI shortSrc
MOVBU -2(src), offset
MOVBU -1(src), tmp1
ORR tmp1 << 8, offset
CMP $0, offset
BEQ corrupt
// Read rest of match length.
CMP $15, len
BNE readMatchlenDone
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD tmp1, len
CMP $255, tmp1
BEQ readMatchlenLoop
2021-02-28 23:08:33 +00:00
// Bounds check dst+len+minMatch and match = dst-offset.
ADD dst, len, tmp1
2021-02-28 23:08:33 +00:00
ADD minMatch, tmp1
CMP dstend, tmp1
//BHI shortDst // Uncomment for distinct error codes.
SUB offset, dst, match
CMP.LS match, dstorig
BHI corrupt
2021-02-28 23:08:33 +00:00
// Since len+minMatch is at least four, we can do a 4× unrolled
// byte copy loop. Using MOVW instead of four byte loads is faster,
// but to remain portable we'd have to align match first, which is
// too expensive. By alternating loads and stores, we also handle
// the case offset < 4.
2021-02-28 23:08:33 +00:00
SUB.S $4, len
MOVBU.P 4(match), tmp1
MOVB.P tmp1, 4(dst)
MOVBU -3(match), tmp2
MOVB tmp2, -3(dst)
MOVBU -2(match), tmp3
MOVB tmp3, -2(dst)
MOVBU -1(match), tmp1
MOVB tmp1, -1(dst)
BPL copyMatch4
// Restore len, which is now negative.
ADD.S $4, len
BEQ copyMatchDone
2021-02-28 23:08:33 +00:00
// Finish with a byte-at-a-time copy.
SUB.S $1, len
MOVBU.P 1(match), tmp2
MOVB.P tmp2, 1(dst)
BNE copyMatch
CMP src, srcend
BNE loop
SUB dstorig, dst, tmp1
MOVW tmp1, ret+24(FP)
// The three error cases have distinct labels so we can put different
// return codes here when debugging, or if the error returns need to
// be changed.
MOVW $-1, tmp1
MOVW tmp1, ret+24(FP)