mattermost-community-enterp.../vendor/github.com/minio/minlz/index.go

// Copyright 2025 MinIO Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package minlz

import (
	"bytes"
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io"
	"sort"
)

const (
	IndexHeader     = "s2idx\x00"
	IndexTrailer    = "\x00xdi2s"
	maxIndexEntries = 1 << 16
	minIndexDist    = 1 << 20 // Minimum uncompressed distance between entries
)

// Index represents an S2/Snappy/MinLZ index.
type Index struct {
	// Total Uncompressed size.
	TotalUncompressed int64

	// Total Compressed size if known. Will be -1 if unknown.
	TotalCompressed int64

	// Offset pairs are pairs of Compressed -> Uncompressed positions.
	// Offsets are stream offsets from first stream byte.
	// It will be safe to start decompressing from any of these offsets.
	// The slice is sorted by offset.
	Offsets []OffsetPair

	estBlockUncomp int64
}

type OffsetPair struct {
	CompressedOffset   int64
	UncompressedOffset int64
}

func (i *Index) reset(maxBlock int) {
	if i == nil {
		return
	}
	for maxBlock < minIndexDist {
		maxBlock *= 2
	}
	i.estBlockUncomp = int64(maxBlock)
	i.TotalCompressed = -1
	i.TotalUncompressed = -1
	if len(i.Offsets) > 0 {
		i.Offsets = i.Offsets[:0]
	}
}

// allocInfos will allocate an empty slice of infos.
func (i *Index) allocInfos(n int) {
	if n > maxIndexEntries {
		panic("n > maxIndexEntries")
	}
	i.Offsets = make([]OffsetPair, 0, n)
}

// add an uncompressed and compressed pair.
// Entries must be sent in order.
func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
	if i == nil {
		return nil
	}
	lastIdx := len(i.Offsets) - 1
	if lastIdx >= 0 {
		latest := i.Offsets[lastIdx]
		if uncompressedOffset-latest.UncompressedOffset < i.estBlockUncomp {
			// Don't add until we have i.estBlockUncomp
			return nil
		}
		if latest.UncompressedOffset > uncompressedOffset {
			return fmt.Errorf("internal error: Earlier uncompressed received (%d > %d)", latest.UncompressedOffset, uncompressedOffset)
		}
		if latest.CompressedOffset > compressedOffset {
			return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.UncompressedOffset, uncompressedOffset)
		}
	}
	i.Offsets = append(i.Offsets, OffsetPair{CompressedOffset: compressedOffset, UncompressedOffset: uncompressedOffset})
	if len(i.Offsets) > maxIndexEntries {
		// Keep memory from exploding.
		i.reduceLight()
	}
	return nil
}

// Find the offset at or before the wanted (uncompressed) offset.
// If offset is 0 or positive it is the offset from the beginning of the file.
// If the uncompressed size is known, the offset must be within the file.
// If an offset outside the file is requested io.ErrUnexpectedEOF is returned.
// If the offset is negative, it is interpreted as the distance from the end of the file,
// where -1 represents the last byte.
// If offset from the end of the file is requested, but size is unknown,
// ErrUnsupported will be returned.
func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err error) {
	if i.TotalUncompressed < 0 {
		return 0, 0, ErrCorrupt
	}
	if offset < 0 {
		offset = i.TotalUncompressed + offset
		if offset < 0 {
			return 0, 0, io.ErrUnexpectedEOF
		}
	}
	if offset > i.TotalUncompressed {
		return 0, 0, io.ErrUnexpectedEOF
	}
	if len(i.Offsets) > 200 {
		n := sort.Search(len(i.Offsets), func(n int) bool {
			return i.Offsets[n].UncompressedOffset > offset
		})
		if n == 0 {
			n = 1
		}
		return i.Offsets[n-1].CompressedOffset, i.Offsets[n-1].UncompressedOffset, nil
	}
	for _, info := range i.Offsets {
		if info.UncompressedOffset > offset {
			break
		}
		compressedOff = info.CompressedOffset
		uncompressedOff = info.UncompressedOffset
	}
	return compressedOff, uncompressedOff, nil
}

// reduce to stay below maxIndexEntries
func (i *Index) reduce() {
	if len(i.Offsets) < maxIndexEntries {
		return
	}

	// Algorithm, keep 1, remove removeN entries...
	removeN := (len(i.Offsets) + 1) / maxIndexEntries
	src := i.Offsets
	j := 0

	// Each block should be at least 1MB, but don't reduce below 1000 entries.
	for i.estBlockUncomp*(int64(removeN)+1) < minIndexDist && len(i.Offsets)/(removeN+1) > 1000 {
		removeN++
	}
	for idx := 0; idx < len(src); idx++ {
		i.Offsets[j] = src[idx]
		j++
		idx += removeN
	}
	i.Offsets = i.Offsets[:j]
	// Update maxblock estimate.
	i.estBlockUncomp += i.estBlockUncomp * int64(removeN)
}

// reduce to stay below maxIndexEntries
func (i *Index) reduceLight() {
	i.estBlockUncomp *= 2
	src := i.Offsets
	var j int
	for idx := 0; idx < len(src); idx++ {
		base := src[idx]
		i.Offsets[j] = base
		j++
		for idx < len(src) && src[idx].UncompressedOffset-base.UncompressedOffset < i.estBlockUncomp {
			idx++
		}
	}
	i.Offsets = i.Offsets[:j]
}

func (i *Index) appendTo(b []byte, uncompTotal, compTotal int64) []byte {
	if i == nil {
		return nil
	}
	i.reduce()
	var tmp [binary.MaxVarintLen64]byte

	initSize := len(b)
	// We make the start a skippable header+size.
	b = append(b, chunkTypeIndex, 0, 0, 0)
	b = append(b, []byte(IndexHeader)...)
	// Total Uncompressed size
	n := binary.PutVarint(tmp[:], uncompTotal)
	b = append(b, tmp[:n]...)
	// Total Compressed size
	n = binary.PutVarint(tmp[:], compTotal)
	b = append(b, tmp[:n]...)
	// Put EstBlockUncomp size
	n = binary.PutVarint(tmp[:], i.estBlockUncomp)
	b = append(b, tmp[:n]...)
	// Put length
	n = binary.PutVarint(tmp[:], int64(len(i.Offsets)))
	b = append(b, tmp[:n]...)

	// Check if we should add uncompressed offsets
	var hasUncompressed byte
	for idx, info := range i.Offsets {
		if idx == 0 {
			if info.UncompressedOffset != 0 {
				hasUncompressed = 1
				break
			}
			continue
		}
		if info.UncompressedOffset != i.Offsets[idx-1].UncompressedOffset+i.estBlockUncomp {
			hasUncompressed = 1
			break
		}
	}
	b = append(b, hasUncompressed)

	// Add each entry
	if hasUncompressed == 1 {
		for idx, info := range i.Offsets {
			uOff := info.UncompressedOffset
			if idx > 0 {
				prev := i.Offsets[idx-1]
				uOff -= prev.UncompressedOffset + (i.estBlockUncomp)
			}
			n = binary.PutVarint(tmp[:], uOff)
			b = append(b, tmp[:n]...)
		}
	}

	// Initial compressed size estimate.
	cPredict := i.estBlockUncomp / 2

	for idx, info := range i.Offsets {
		cOff := info.CompressedOffset
		if idx > 0 {
			prev := i.Offsets[idx-1]
			cOff -= prev.CompressedOffset + cPredict
			// Update compressed size prediction, with half the error.
			cPredict += cOff / 2
		}
		b = binary.AppendVarint(b, cOff)
	}

	// Add Total Size.
	// Stored as fixed size for easier reading.
	binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)-initSize+4+len(IndexTrailer)))
	b = append(b, tmp[:4]...)
	// Trailer
	b = append(b, []byte(IndexTrailer)...)

	// Update size
	chunkLen := len(b) - initSize - skippableFrameHeader
	b[initSize+1] = uint8(chunkLen >> 0)
	b[initSize+2] = uint8(chunkLen >> 8)
	b[initSize+3] = uint8(chunkLen >> 16)
	//fmt.Printf("chunklen: 0x%x Uncomp:%d, Comp:%d\n", chunkLen, uncompTotal, compTotal)
	return b
}

// Load a binary index.
// A zero value Index can be used or a previous one can be reused.
func (i *Index) Load(b []byte) ([]byte, error) {
	if len(b) <= 4+len(IndexHeader)+len(IndexTrailer) {
		return b, io.ErrUnexpectedEOF
	}
	if b[0] != chunkTypeIndex && b[0] != legacyIndexChunk {
		return b, ErrCorrupt
	}
	chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
	b = b[4:]

	// Validate we have enough...
	if len(b) < chunkLen {
		return b, io.ErrUnexpectedEOF
	}
	if !bytes.Equal(b[:len(IndexHeader)], []byte(IndexHeader)) {
		return b, ErrUnsupported
	}
	b = b[len(IndexHeader):]

	// Total Uncompressed
	if v, n := binary.Varint(b); n <= 0 || v < 0 {
		return b, ErrCorrupt
	} else {
		i.TotalUncompressed = v
		b = b[n:]
	}

	// Total Compressed
	if v, n := binary.Varint(b); n <= 0 {
		return b, ErrCorrupt
	} else {
		i.TotalCompressed = v
		b = b[n:]
	}

	// Read EstBlockUncomp
	if v, n := binary.Varint(b); n <= 0 {
		return b, ErrCorrupt
	} else {
		if v < 0 {
			return b, ErrCorrupt
		}
		i.estBlockUncomp = v
		b = b[n:]
	}

	var entries int
	if v, n := binary.Varint(b); n <= 0 {
		return b, ErrCorrupt
	} else {
		if v < 0 || v > maxIndexEntries {
			return b, ErrCorrupt
		}
		entries = int(v)
		b = b[n:]
	}
	if cap(i.Offsets) < entries {
		i.allocInfos(entries)
	}
	i.Offsets = i.Offsets[:entries]

	if len(b) < 1 {
		return b, io.ErrUnexpectedEOF
	}
	hasUncompressed := b[0]
	b = b[1:]
	if hasUncompressed&1 != hasUncompressed {
		return b, ErrCorrupt
	}

	// Add each uncompressed entry
	for idx := range i.Offsets {
		var uOff int64
		if hasUncompressed != 0 {
			// Load delta
			if v, n := binary.Varint(b); n <= 0 {
				return b, ErrCorrupt
			} else {
				uOff = v
				b = b[n:]
			}
		}

		if idx > 0 {
			prev := i.Offsets[idx-1].UncompressedOffset
			uOff += prev + (i.estBlockUncomp)
			if uOff <= prev {
				return b, ErrCorrupt
			}
		}
		if uOff < 0 {
			return b, ErrCorrupt
		}
		i.Offsets[idx].UncompressedOffset = uOff
	}

	// Initial compressed size estimate.
	cPredict := i.estBlockUncomp / 2

	// Add each compressed entry
	for idx := range i.Offsets {
		var cOff int64
		if v, n := binary.Varint(b); n <= 0 {
			return b, ErrCorrupt
		} else {
			cOff = v
			b = b[n:]
		}

		if idx > 0 {
			// Update compressed size prediction, with half the error.
			cPredictNew := cPredict + cOff/2

			prev := i.Offsets[idx-1].CompressedOffset
			cOff += prev + cPredict
			if cOff <= prev {
				return b, ErrCorrupt
			}
			cPredict = cPredictNew
		}
		if cOff < 0 {
			return b, ErrCorrupt
		}
		i.Offsets[idx].CompressedOffset = cOff
	}
	if len(b) < 4+len(IndexTrailer) {
		return b, io.ErrUnexpectedEOF
	}
	// Skip size...
	b = b[4:]

	// Check trailer...
	if !bytes.Equal(b[:len(IndexTrailer)], []byte(IndexTrailer)) {
		return b, ErrCorrupt
	}
	return b[len(IndexTrailer):], nil
}

// LoadStream will load an index from the end of the supplied stream.
// ErrUnsupported will be returned if the signature cannot be found.
// ErrCorrupt will be returned if unexpected values are found.
// io.ErrUnexpectedEOF is returned if there are too few bytes.
// IO errors are returned as-is.
func (i *Index) LoadStream(rs io.ReadSeeker) error {
	// Go to end.
	_, err := rs.Seek(-10, io.SeekEnd)
	if err != nil {
		return err
	}
	var tmp [10]byte
	_, err = io.ReadFull(rs, tmp[:])
	if err != nil {
		return err
	}
	// Check trailer...
	if !bytes.Equal(tmp[4:4+len(IndexTrailer)], []byte(IndexTrailer)) {
		return ErrUnsupported
	}
	sz := binary.LittleEndian.Uint32(tmp[:4])
	if sz > MaxUserChunkSize+skippableFrameHeader {
		return ErrCorrupt
	}
	_, err = rs.Seek(-int64(sz), io.SeekEnd)
	if err != nil {
		return err
	}

	// Read index.
	buf := make([]byte, sz)
	_, err = io.ReadFull(rs, buf)
	if err != nil {
		return err
	}
	_, err = i.Load(buf)
	return err
}

// IndexStream will return an index for a stream.
// The stream structure will be checked, but
// data within blocks is not verified.
// The returned index can either be appended to the end of the stream
// or stored separately.
func IndexStream(r io.Reader) ([]byte, error) {
	var i Index
	var buf [MaxUserChunkSize]byte
	var readHeader bool
	for {
		_, err := io.ReadFull(r, buf[:4])
		if err != nil {
			if err == io.EOF {
				return i.appendTo(nil, i.TotalUncompressed, i.TotalCompressed), nil
			}
			return nil, err
		}
		// Start of this chunk.
		startChunk := i.TotalCompressed
		i.TotalCompressed += 4

		chunkType := buf[0]
		if !readHeader {
			if chunkType != ChunkTypeStreamIdentifier && chunkType != chunkTypeEOF {
				return nil, ErrCorrupt
			}
			readHeader = true
		}
		chunkLen := int(buf[1]) | int(buf[2])<<8 | int(buf[3])<<16
		if chunkLen < checksumSize {
			return nil, ErrCorrupt
		}

		i.TotalCompressed += int64(chunkLen)
		_, err = io.ReadFull(r, buf[:chunkLen])
		if err != nil {
			return nil, io.ErrUnexpectedEOF
		}

		switch chunkType {
		case chunkTypeLegacyCompressedData, chunkTypeMinLZCompressedData, chunkTypeMinLZCompressedDataCompCRC:
			// Section 4.2. Compressed data (chunk type 0x00).
			// Skip checksum.
			dLen, err := DecodedLen(buf[checksumSize:])
			if err != nil {
				return nil, err
			}
			if dLen > maxBlockSize {
				return nil, ErrCorrupt
			}
			if i.estBlockUncomp == 0 {
				// Use first block for estimate...
				i.estBlockUncomp = int64(dLen)
			}
			err = i.add(startChunk, i.TotalUncompressed)
			if err != nil {
				return nil, err
			}
			i.TotalUncompressed += int64(dLen)
			continue
		case chunkTypeUncompressedData:
			n2 := chunkLen - checksumSize
			if n2 > maxBlockSize {
				return nil, ErrCorrupt
			}
			if i.estBlockUncomp == 0 {
				// Use first block for estimate...
				i.estBlockUncomp = int64(n2)
			}
			err = i.add(startChunk, i.TotalUncompressed)
			if err != nil {
				return nil, err
			}
			i.TotalUncompressed += int64(n2)
			continue
		case ChunkTypeStreamIdentifier:
			// Section 4.1. Stream identifier (chunk type 0xff).
			if chunkLen != magicBodyLen {
				return nil, ErrCorrupt
			}

			if string(buf[:len(magicBody)]) != magicBody {
				if string(buf[:len(magicBodyS2)]) != magicBodyS2 {
					if string(buf[:magicBodyLen]) != magicBodySnappy {
						return nil, ErrCorrupt
					}
				}
			}
			continue
		case chunkTypeEOF:
			continue
		}

		if chunkType <= maxNonSkippableChunk {
			// Section 4.5. Reserved unskippable chunks (chunk types 0x03-0x3f).
			fmt.Println("UN:", chunkType)
			return nil, ErrUnsupported
		}
		// Skip user chunks and padding.
	}
}

// JSON returns the index as JSON text.
func (i *Index) JSON() []byte {
	type offset struct {
		CompressedOffset   int64 `json:"compressed"`
		UncompressedOffset int64 `json:"uncompressed"`
	}
	x := struct {
		TotalUncompressed int64    `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
		TotalCompressed   int64    `json:"total_compressed"`   // Total Compressed size if known. Will be -1 if unknown.
		Offsets           []offset `json:"offsets"`
		EstBlockUncomp    int64    `json:"est_block_uncompressed"`
	}{
		TotalUncompressed: i.TotalUncompressed,
		TotalCompressed:   i.TotalCompressed,
		EstBlockUncomp:    i.estBlockUncomp,
	}
	for _, v := range i.Offsets {
		x.Offsets = append(x.Offsets, offset{CompressedOffset: v.CompressedOffset, UncompressedOffset: v.UncompressedOffset})
	}
	b, _ := json.MarshalIndent(x, "", "  ")
	return b
}

// RemoveIndexHeaders will trim all headers and trailers from a given index.
// This is expected to save 20 bytes.
// These can be restored using RestoreIndexHeaders.
// This removes a layer of security, but is the most compact representation.
// Returns nil if headers contains errors.
// The returned slice references the provided slice.
func RemoveIndexHeaders(b []byte) []byte {
	const save = 4 + len(IndexHeader) + len(IndexTrailer) + 4
	if len(b) <= save {
		return nil
	}
	if b[0] != chunkTypeIndex {
		return nil
	}
	chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
	b = b[4:]

	// Validate we have enough...
	if len(b) < chunkLen {
		return nil
	}
	b = b[:chunkLen]

	if !bytes.Equal(b[:len(IndexHeader)], []byte(IndexHeader)) {
		return nil
	}
	b = b[len(IndexHeader):]
	if !bytes.HasSuffix(b, []byte(IndexTrailer)) {
		return nil
	}
	b = bytes.TrimSuffix(b, []byte(IndexTrailer))

	if len(b) < 4 {
		return nil
	}
	return b[:len(b)-4]
}

// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders.
// No error checking is performed on the input.
// If a 0 length slice is sent, it is returned without modification.
func RestoreIndexHeaders(in []byte) []byte {
	if len(in) == 0 {
		return in
	}
	b := make([]byte, 0, 4+len(IndexHeader)+len(in)+len(IndexTrailer)+4)
	b = append(b, chunkTypeIndex, 0, 0, 0)
	b = append(b, []byte(IndexHeader)...)
	b = append(b, in...)

	// Size of block as uint32
	b = binary.LittleEndian.AppendUint32(b, uint32(len(b)+4+len(IndexTrailer)))

	// Trailer
	b = append(b, []byte(IndexTrailer)...)

	chunkLen := len(b) - skippableFrameHeader
	b[1] = uint8(chunkLen >> 0)
	b[2] = uint8(chunkLen >> 8)
	b[3] = uint8(chunkLen >> 16)
	return b
}