mattermost-community-enterp.../vendor/github.com/andybalholm/brotli/matchfinder/pathfinder.go
Claude ec1f89217a Merge: Complete Mattermost Server with Community Enterprise
Full Mattermost server source with integrated Community Enterprise features.
Includes vendor directory for offline/air-gapped builds.

Structure:
- enterprise-impl/: Enterprise feature implementations
- enterprise-community/: Init files that register implementations
- enterprise/: Bridge imports (community_imports.go)
- vendor/: All dependencies for offline builds

Build (online):
  go build ./cmd/mattermost

Build (offline/air-gapped):
  go build -mod=vendor ./cmd/mattermost

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 23:59:07 +09:00

329 lines
8.6 KiB
Go

package matchfinder
import (
"encoding/binary"
"math"
"math/bits"
"slices"
)
// Pathfinder is a MatchFinder that uses hash chains to find matches, and a
// shortest-path optimizer to choose which matches to use.
type Pathfinder struct {
// MaxDistance is the maximum distance (in bytes) to look back for
// a match. The default is 65535.
MaxDistance int
// MinLength is the length of the shortest match to return.
// The default is 4.
MinLength int
// HashLen is the number of bytes to use to calculate the hashes.
// The maximum is 8 and the default is 6.
HashLen int
// TableBits is the number of bits in the hash table indexes.
// The default is 17 (128K entries).
TableBits int
// ChainLength is how many entries to search on the "match chain" of older
// locations with the same hash as the current location.
ChainLength int
table []uint32
chain []uint32
history []byte
// holding onto buffers to reduce allocations:
arrivals []arrival
foundMatches []absoluteMatch
matches []Match
}
func (q *Pathfinder) Reset() {
for i := range q.table {
q.table[i] = 0
}
q.history = q.history[:0]
q.chain = q.chain[:0]
}
// An arrival represents how we got to a certain byte position.
// The cost is the total cost to get there from the beginning of the block.
// If distance > 0, the arrival is with a match.
// If distance == 0, the arrival is with a run of literals.
type arrival struct {
length uint32
distance uint32
cost float32
}
const (
baseMatchCost float32 = 4
)
func (q *Pathfinder) FindMatches(dst []Match, src []byte) []Match {
if q.MaxDistance == 0 {
q.MaxDistance = 65535
}
if q.MinLength == 0 {
q.MinLength = 4
}
if q.HashLen == 0 {
q.HashLen = 6
}
if q.TableBits == 0 {
q.TableBits = 17
}
if len(q.table) < 1<<q.TableBits {
q.table = make([]uint32, 1<<q.TableBits)
}
var histogram [256]uint32
for _, b := range src {
histogram[b]++
}
var byteCost [256]float32
for b, n := range histogram {
cost := max(math.Log2(float64(len(src))/float64(n)), 1)
byteCost[b] = float32(cost)
}
// Each element in arrivals corresponds to the position just after
// the corresponding byte in src.
arrivals := q.arrivals
if len(arrivals) < len(src) {
arrivals = make([]arrival, len(src))
q.arrivals = arrivals
} else {
arrivals = arrivals[:len(src)]
for i := range arrivals {
arrivals[i] = arrival{}
}
}
if len(q.history) > q.MaxDistance*2 {
// Trim down the history buffer.
delta := len(q.history) - q.MaxDistance
copy(q.history, q.history[delta:])
q.history = q.history[:q.MaxDistance]
q.chain = q.chain[:q.MaxDistance]
for i, v := range q.table {
newV := max(int(v)-delta, 0)
q.table[i] = uint32(newV)
}
}
// Append src to the history buffer.
historyLen := len(q.history)
q.history = append(q.history, src...)
q.chain = append(q.chain, make([]uint32, len(src))...)
src = q.history
// Calculate hashes and build the chain.
for i := historyLen; i < len(src)-7; i++ {
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - q.TableBits)
candidate := int(q.table[h])
q.table[h] = uint32(i)
if candidate != 0 {
delta := i - candidate
q.chain[i] = uint32(delta)
}
}
// Look for matches, and collect them in foundMatches. Later we'll figure out
// which ones to actually use.
foundMatches := q.foundMatches[:0]
var prevMatch absoluteMatch
i := historyLen
for i < len(src)-7 {
delta := q.chain[i]
if delta == 0 {
i++
continue
}
candidate := i - int(delta)
if candidate <= 0 || i-candidate > q.MaxDistance {
i++
continue
}
var currentMatch absoluteMatch
if i >= prevMatch.End && prevMatch != (absoluteMatch{}) {
// Look for a repeat match at i+1.
prevDistance := prevMatch.Start - prevMatch.Match
if binary.LittleEndian.Uint32(src[i+1:]) == binary.LittleEndian.Uint32(src[i+1-prevDistance:]) {
m := extendMatch2(src, i+1, i+1-prevDistance, i+1)
if m.End-m.Start > q.MinLength {
currentMatch = m
foundMatches = append(foundMatches, m)
}
}
}
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
m := extendMatch2(src, i, candidate, max(historyLen, prevMatch.Start))
if m.End-m.Start > q.MinLength {
currentMatch = m
foundMatches = append(foundMatches, m)
}
}
for range q.ChainLength {
delta := q.chain[candidate]
if delta == 0 {
break
}
candidate -= int(delta)
if candidate <= 0 || i-candidate > q.MaxDistance {
break
}
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
m := extendMatch2(src, i, candidate, max(historyLen, prevMatch.Start))
if m.End-m.Start > q.MinLength && m.End-m.Start > currentMatch.End-currentMatch.Start {
currentMatch = m
foundMatches = append(foundMatches, m)
}
}
}
if i < prevMatch.End && currentMatch.End-currentMatch.Start <= prevMatch.End-prevMatch.Start {
// We were looking for an overlapping match, but we didn't find one longer
// than the previous match. So we'll go back to sequential search,
// starting right after the previous match.
i = prevMatch.End
continue
}
if currentMatch == (absoluteMatch{}) {
// No match found. Continue with sequential search.
i++
continue
}
// We've found a match; now look for matches overlapping the end of it.
prevMatch = currentMatch
i = currentMatch.End + 2 - q.HashLen
}
q.foundMatches = foundMatches
slices.SortFunc(foundMatches, func(a, b absoluteMatch) int { return a.Start - b.Start })
matchIndex := 0
var pending absoluteMatch
for i := historyLen; i < len(src); i++ {
var arrivedHere arrival
if i > historyLen {
arrivedHere = arrivals[i-historyLen-1]
}
unmatched := 0
if arrivedHere.distance == 0 {
unmatched = int(arrivedHere.length)
}
prevDistance := 0
if i-unmatched > historyLen {
prevDistance = int(arrivals[i-historyLen-1-unmatched].distance)
}
literalCost := byteCost[src[i]]
nextArrival := &arrivals[i-historyLen]
if nextArrival.cost == 0 || arrivedHere.cost+literalCost < nextArrival.cost {
*nextArrival = arrival{
cost: arrivedHere.cost + literalCost,
length: uint32(unmatched + 1),
}
}
for matchIndex < len(foundMatches) && foundMatches[matchIndex].Start == i {
m := foundMatches[matchIndex]
matchIndex++
if m.End > pending.End {
pending = m
}
matchCost := baseMatchCost + float32(bits.Len(uint(unmatched)))
if m.Start-m.Match != prevDistance {
matchCost += float32(bits.Len(uint(m.Start - m.Match)))
}
for j := m.Start + q.MinLength; j <= m.End; j++ {
adjustedCost := matchCost
if j-m.Start < 6 {
// Matches shorter than 6 are comparatively rare, and therefore
// have longer codes.
adjustedCost += float32(6-(j-m.Start)) * 2
}
a := &arrivals[j-historyLen-1]
if a.cost == 0 || arrivedHere.cost+adjustedCost < a.cost {
*a = arrival{
length: uint32(j - m.Start),
distance: uint32(m.Start - m.Match),
cost: arrivedHere.cost + adjustedCost,
}
}
}
}
// If a match from an earlier position extends far enough past the current
// position, try using the tail of it, starting from here.
if unmatched == 0 && pending.Start != i && pending.End >= i+q.MinLength &&
!(arrivedHere.length != 0 && arrivedHere.distance == uint32(pending.Start-pending.Match)) {
matchCost := baseMatchCost + float32(bits.Len(uint(pending.Start-pending.Match)))
for j := i + q.MinLength; j <= pending.End; j++ {
adjustedCost := matchCost
if j-i < 6 {
// Matches shorter than 6 are comparatively rare, and therefore
// have longer codes.
adjustedCost += float32(6-(j-i)) * 2
}
a := &arrivals[j-historyLen-1]
if a.cost == 0 || arrivedHere.cost+adjustedCost < a.cost {
*a = arrival{
length: uint32(j - i),
distance: uint32(pending.Start - pending.Match),
cost: arrivedHere.cost + adjustedCost,
}
}
}
}
delta := q.chain[i]
if delta == 0 {
continue
}
candidate := i - int(delta)
if candidate <= 0 || i-candidate > q.MaxDistance {
continue
}
}
// We've found the shortest path; now walk it backward and store the matches.
matches := q.matches[:0]
i = len(arrivals) - 1
for i >= 0 {
a := arrivals[i]
if a.distance > 0 {
matches = append(matches, Match{
Length: int(a.length),
Distance: int(a.distance),
})
i -= int(a.length)
} else {
if len(matches) == 0 {
matches = append(matches, Match{})
}
matches[len(matches)-1].Unmatched = int(a.length)
i -= int(a.length)
}
}
q.matches = matches
slices.Reverse(matches)
return append(dst, matches...)
}