mattermost-community-enterp.../vendor/github.com/mholt/archives/formats.go
Claude ec1f89217a Merge: Complete Mattermost Server with Community Enterprise
Full Mattermost server source with integrated Community Enterprise features.
Includes vendor directory for offline/air-gapped builds.

Structure:
- enterprise-impl/: Enterprise feature implementations
- enterprise-community/: Init files that register implementations
- enterprise/: Bridge imports (community_imports.go)
- vendor/: All dependencies for offline builds

Build (online):
  go build ./cmd/mattermost

Build (offline/air-gapped):
  go build -mod=vendor ./cmd/mattermost

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 23:59:07 +09:00

441 lines
14 KiB
Go

package archives
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"path"
"path/filepath"
"strings"
)
// RegisterFormat registers a format. It should be called during init.
// Duplicate formats by name are not allowed and will panic.
func RegisterFormat(format Format) {
name := strings.Trim(strings.ToLower(format.Extension()), ".")
if _, ok := formats[name]; ok {
panic("format " + name + " is already registered")
}
formats[name] = format
}
// Identify iterates the registered formats and returns the one that
// matches the given filename and/or stream. It is capable of identifying
// compressed files (.gz, .xz...), archive files (.tar, .zip...), and
// compressed archive files (tar.gz, tar.bz2...). The returned Format
// value can be type-asserted to ascertain its capabilities.
//
// If no matching formats were found, special error NoMatch is returned.
//
// If stream is nil then it will only match on file name and the
// returned io.Reader will be nil.
//
// If stream is non-nil, it will be returned in the same read position
// as it was before Identify() was called, by virtue of buffering the
// peeked bytes. However, if the stream is an io.Seeker, Seek() must
// work, no extra buffering will be performed, and the original input
// value will be returned at the original position by seeking.
func Identify(ctx context.Context, filename string, stream io.Reader) (Format, io.Reader, error) {
var compression Compression
var archival Archival
var extraction Extraction
filename = path.Base(filepath.ToSlash(filename))
rewindableStream, err := newRewindReader(stream)
if err != nil {
return nil, nil, err
}
// try compression format first, since that's the outer "layer" if combined
for name, format := range formats {
cf, isCompression := format.(Compression)
if !isCompression {
continue
}
matchResult, err := identifyOne(ctx, format, filename, rewindableStream, nil)
if err != nil {
return nil, rewindableStream.reader(), fmt.Errorf("matching %s: %w", name, err)
}
// if matched, wrap input stream with decompression
// so we can see if it contains an archive within
if matchResult.Matched() {
compression = cf
break
}
}
// try archival and extraction formats next
for name, format := range formats {
ar, isArchive := format.(Archival)
ex, isExtract := format.(Extraction)
if !isArchive && !isExtract {
continue
}
matchResult, err := identifyOne(ctx, format, filename, rewindableStream, compression)
if err != nil {
return nil, rewindableStream.reader(), fmt.Errorf("matching %s: %w", name, err)
}
if matchResult.Matched() {
archival = ar
extraction = ex
break
}
}
// the stream should be rewound by identifyOne; then return the most specific type of match
bufferedStream := rewindableStream.reader()
switch {
case compression != nil && archival == nil && extraction == nil:
return compression, bufferedStream, nil
case compression == nil && archival != nil && extraction == nil:
return archival, bufferedStream, nil
case compression == nil && archival == nil && extraction != nil:
return extraction, bufferedStream, nil
case compression == nil && archival != nil && extraction != nil:
// archival and extraction are always set together, so they must be the same
return archival, bufferedStream, nil
case compression != nil && extraction != nil:
// in practice, this is only used for compressed tar files, and the tar format can
// both read and write, so the archival value should always work too; but keep in
// mind that Identify() is used on existing files to be read, not new files to write
return CompressedArchive{archival, extraction, compression}, bufferedStream, nil
default:
return nil, bufferedStream, NoMatch
}
}
func identifyOne(ctx context.Context, format Format, filename string, stream *rewindReader, comp Compression) (mr MatchResult, err error) {
defer stream.rewind()
if filename == "." {
filename = ""
}
// if looking within a compressed format, wrap the stream in a
// reader that can decompress it so we can match the "inner" format
// (yes, we have to make a new reader every time we do a match,
// because we reset/seek the stream each time and that can mess up
// the compression reader's state if we don't discard it also)
if comp != nil && stream != nil {
decompressedStream, openErr := comp.OpenReader(stream)
if openErr != nil {
return MatchResult{}, openErr
}
defer decompressedStream.Close()
mr, err = format.Match(ctx, filename, decompressedStream)
} else {
// Make sure we pass a nil io.Reader not a *rewindReader(nil)
var r io.Reader
if stream != nil {
r = stream
}
mr, err = format.Match(ctx, filename, r)
}
// if the error is EOF, we can just ignore it.
// Just means we have a small input file.
if errors.Is(err, io.EOF) {
err = nil
}
return mr, err
}
// readAtMost reads at most n bytes from the stream. A nil, empty, or short
// stream is not an error. The returned slice of bytes may have length < n
// without an error.
func readAtMost(stream io.Reader, n int) ([]byte, error) {
if stream == nil || n <= 0 {
return []byte{}, nil
}
buf := make([]byte, n)
nr, err := io.ReadFull(stream, buf)
// Return the bytes read if there was no error OR if the
// error was EOF (stream was empty) or UnexpectedEOF (stream
// had less than n). We ignore those errors because we aren't
// required to read the full n bytes; so an empty or short
// stream is not actually an error.
if err == nil ||
errors.Is(err, io.EOF) ||
errors.Is(err, io.ErrUnexpectedEOF) {
return buf[:nr], nil
}
return nil, err
}
// CompressedArchive represents an archive which is compressed externally
// (for example, a gzipped tar file, .tar.gz.) It combines a compression
// format on top of an archival/extraction format and provides both
// functionalities in a single type, allowing archival and extraction
// operations transparently through compression and decompression. However,
// compressed archives have some limitations; for example, files cannot be
// inserted/appended because of complexities with modifying existing
// compression state (perhaps this could be overcome, but I'm not about to
// try it).
type CompressedArchive struct {
Archival
Extraction
Compression
}
// Name returns a concatenation of the archive and compression format extensions.
func (ca CompressedArchive) Extension() string {
var name string
if ca.Archival != nil {
name += ca.Archival.Extension()
} else if ca.Extraction != nil {
name += ca.Extraction.Extension()
}
name += ca.Compression.Extension()
return name
}
// MediaType returns the compression format's MIME type, since
// a compressed archive is fundamentally a compressed file.
func (ca CompressedArchive) MediaType() string { return ca.Compression.MediaType() }
// Match matches if the input matches both the compression and archival/extraction format.
func (ca CompressedArchive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) {
var conglomerate MatchResult
if ca.Compression != nil {
matchResult, err := ca.Compression.Match(ctx, filename, stream)
if err != nil {
return MatchResult{}, err
}
if !matchResult.Matched() {
return matchResult, nil
}
// wrap the reader with the decompressor so we can
// attempt to match the archive by reading the stream
rc, err := ca.Compression.OpenReader(stream)
if err != nil {
return matchResult, err
}
defer rc.Close()
stream = rc
conglomerate = matchResult
}
if ca.Archival != nil {
matchResult, err := ca.Archival.Match(ctx, filename, stream)
if err != nil {
return MatchResult{}, err
}
if !matchResult.Matched() {
return matchResult, nil
}
conglomerate.ByName = conglomerate.ByName || matchResult.ByName
conglomerate.ByStream = conglomerate.ByStream || matchResult.ByStream
}
return conglomerate, nil
}
// Archive writes an archive to the output stream while compressing the result.
func (ca CompressedArchive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
if ca.Archival == nil {
return fmt.Errorf("no archival format")
}
if ca.Compression != nil {
wc, err := ca.Compression.OpenWriter(output)
if err != nil {
return err
}
defer wc.Close()
output = wc
}
return ca.Archival.Archive(ctx, output, files)
}
// ArchiveAsync adds files to the output archive while compressing the result asynchronously.
func (ca CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
if ca.Archival == nil {
return fmt.Errorf("no archival format")
}
do, ok := ca.Archival.(ArchiverAsync)
if !ok {
return fmt.Errorf("%T archive does not support async writing", ca.Archival)
}
if ca.Compression != nil {
wc, err := ca.Compression.OpenWriter(output)
if err != nil {
return err
}
defer wc.Close()
output = wc
}
return do.ArchiveAsync(ctx, output, jobs)
}
// Extract reads files out of a compressed archive while decompressing the results.
func (ca CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
if ca.Extraction == nil {
return fmt.Errorf("no extraction format")
}
if ca.Compression != nil {
rc, err := ca.Compression.OpenReader(sourceArchive)
if err != nil {
return err
}
defer rc.Close()
sourceArchive = rc
}
return ca.Extraction.Extract(ctx, sourceArchive, handleFile)
}
// MatchResult returns true if the format was matched either
// by name, stream, or both. Name usually refers to matching
// by file extension, and stream usually refers to reading
// the first few bytes of the stream (its header). A stream
// match is generally stronger, as filenames are not always
// indicative of their contents if they even exist at all.
type MatchResult struct {
ByName, ByStream bool
}
// Matched returns true if a match was made by either name or stream.
func (mr MatchResult) Matched() bool { return mr.ByName || mr.ByStream }
func (mr MatchResult) String() string {
return fmt.Sprintf("{ByName=%v ByStream=%v}", mr.ByName, mr.ByStream)
}
// rewindReader is a Reader that can be rewound (reset) to re-read what
// was already read and then continue to read more from the underlying
// stream. When no more rewinding is necessary, call reader() to get a
// new reader that first reads the buffered bytes, then continues to
// read from the stream. This is useful for "peeking" a stream an
// arbitrary number of bytes. Loosely based on the Connection type
// from https://github.com/mholt/caddy-l4.
//
// If the reader is also an io.Seeker, no buffer is used, and instead
// the stream seeks back to the starting position.
type rewindReader struct {
io.Reader
start int64
buf *bytes.Buffer
bufReader io.Reader
}
func newRewindReader(r io.Reader) (*rewindReader, error) {
if r == nil {
return nil, nil
}
rr := &rewindReader{Reader: r}
// avoid buffering if we have a seeker we can use
if seeker, ok := r.(io.Seeker); ok {
var err error
rr.start, err = seeker.Seek(0, io.SeekCurrent)
if err != nil {
return nil, fmt.Errorf("seek to determine current position: %w", err)
}
} else {
rr.buf = new(bytes.Buffer)
}
return rr, nil
}
func (rr *rewindReader) Read(p []byte) (n int, err error) {
if rr == nil {
panic("reading from nil rewindReader")
}
// if there is a buffer we should read from, start
// with that; we only read from the underlying stream
// after the buffer has been "depleted"
if rr.bufReader != nil {
n, err = rr.bufReader.Read(p)
if err == io.EOF {
rr.bufReader = nil
err = nil
}
if n == len(p) {
return
}
}
// buffer has been depleted or we are not using one,
// so read from underlying stream
nr, err := rr.Reader.Read(p[n:])
// anything that was read needs to be written to
// the buffer (if used), even if there was an error
if nr > 0 && rr.buf != nil {
if nw, errw := rr.buf.Write(p[n : n+nr]); errw != nil {
return nw, errw
}
}
// up to now, n was how many bytes were read from
// the buffer, and nr was how many bytes were read
// from the stream; add them to return total count
n += nr
return
}
// rewind resets the stream to the beginning by causing
// Read() to start reading from the beginning of the
// stream, or, if buffering, the buffered bytes.
func (rr *rewindReader) rewind() {
if rr == nil {
return
}
if ras, ok := rr.Reader.(io.Seeker); ok {
if _, err := ras.Seek(rr.start, io.SeekStart); err == nil {
return
}
}
rr.bufReader = bytes.NewReader(rr.buf.Bytes())
}
// reader returns a reader that reads first from the buffered
// bytes (if buffering), then from the underlying stream; if a
// Seeker, the stream will be seeked back to the start. After
// calling this, no more rewinding is allowed since reads from
// the stream are not recorded, so rewinding properly is impossible.
// If the underlying reader implements io.Seeker, then the
// underlying reader will be used directly.
func (rr *rewindReader) reader() io.Reader {
if rr == nil {
return nil
}
if ras, ok := rr.Reader.(io.Seeker); ok {
if _, err := ras.Seek(rr.start, io.SeekStart); err == nil {
return rr.Reader
}
}
return io.MultiReader(bytes.NewReader(rr.buf.Bytes()), rr.Reader)
}
// NoMatch is a special error returned if there are no matching formats.
var NoMatch = fmt.Errorf("no formats matched")
// Registered formats.
var formats = make(map[string]Format)
// Interface guards
var (
_ Format = (*CompressedArchive)(nil)
_ Archiver = (*CompressedArchive)(nil)
_ ArchiverAsync = (*CompressedArchive)(nil)
_ Extractor = (*CompressedArchive)(nil)
_ Compressor = (*CompressedArchive)(nil)
_ Decompressor = (*CompressedArchive)(nil)
)