mattermost-community-enterp.../vendor/github.com/mholt/archives/tar.go
Claude ec1f89217a Merge: Complete Mattermost Server with Community Enterprise
Full Mattermost server source with integrated Community Enterprise features.
Includes vendor directory for offline/air-gapped builds.

Structure:
- enterprise-impl/: Enterprise feature implementations
- enterprise-community/: Init files that register implementations
- enterprise/: Bridge imports (community_imports.go)
- vendor/: All dependencies for offline builds

Build (online):
  go build ./cmd/mattermost

Build (offline/air-gapped):
  go build -mod=vendor ./cmd/mattermost

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 23:59:07 +09:00

289 lines
7.9 KiB
Go

package archives
import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"io/fs"
"log"
"strings"
)
func init() {
RegisterFormat(Tar{})
}
type Tar struct {
// Specify the tar format to use when writing headers.
// The default is whichever format is capable of encoding
// the header being written, from this ordered list:
// USTAR, PAX, GNU.
Format tar.Format
// DEPRECATED: Use [Tar.Format] instead.
FormatGNU bool
// If true, preserve only numeric user and group id
NumericUIDGID bool
// If true, errors encountered during reading or writing
// a file within an archive will be logged and the
// operation will continue on remaining files.
ContinueOnError bool
// User ID of the file owner
Uid int
// Group ID of the file owner
Gid int
// Username of the file owner
Uname string
// Group name of the file owner
Gname string
}
func (Tar) Extension() string { return ".tar" }
func (Tar) MediaType() string { return "application/x-tar" }
func (t Tar) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult
// match filename
if strings.Contains(strings.ToLower(filename), t.Extension()) {
mr.ByName = true
}
// match file header
if stream != nil {
r := tar.NewReader(stream)
_, err := r.Next()
mr.ByStream = err == nil
}
return mr, nil
}
func (t Tar) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
tw := tar.NewWriter(output)
defer tw.Close()
for _, file := range files {
if err := t.writeFileToArchive(ctx, tw, file); err != nil {
if t.ContinueOnError && ctx.Err() == nil { // context errors should always abort
log.Printf("[ERROR] %v", err)
continue
}
return err
}
}
return nil
}
func (t Tar) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
tw := tar.NewWriter(output)
defer tw.Close()
for job := range jobs {
job.Result <- t.writeFileToArchive(ctx, tw, job.File)
}
return nil
}
func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file FileInfo) error {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
hdr, err := tar.FileInfoHeader(file, file.LinkTarget)
if err != nil {
return fmt.Errorf("file %s: creating header: %w", file.NameInArchive, err)
}
hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name
if hdr.Name == "" {
hdr.Name = file.Name() // assume base name of file I guess
}
// TODO: FormatGNU is deprecated; remove soon
if t.FormatGNU {
hdr.Format = tar.FormatGNU
}
if t.Format != 0 {
hdr.Format = t.Format
}
if t.NumericUIDGID {
hdr.Uname = ""
hdr.Gname = ""
}
if t.Uid != 0 {
hdr.Uid = t.Uid
}
if t.Gid != 0 {
hdr.Gid = t.Gid
}
if t.Uname != "" {
hdr.Uname = t.Uname
}
if t.Gname != "" {
hdr.Gname = t.Gname
}
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("file %s: writing header: %w", file.NameInArchive, err)
}
// only proceed to write a file body if there is actually a body
// (for example, directories and links don't have a body)
if hdr.Typeflag != tar.TypeReg {
return nil
}
if err := openAndCopyFile(file, tw); err != nil {
return fmt.Errorf("file %s: writing data: %w", file.NameInArchive, err)
}
return nil
}
func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []FileInfo) error {
// Tar files may end with some, none, or a lot of zero-byte padding. The spec says
// it should end with two 512-byte trailer records consisting solely of null/0
// bytes: https://www.gnu.org/software/tar/manual/html_node/Standard.html. However,
// in my experiments using the `tar` command, I've found that is not the case,
// and Colin Percival (author of tarsnap) confirmed this:
// - https://twitter.com/cperciva/status/1476774314623913987
// - https://twitter.com/cperciva/status/1476776999758663680
// So while this solution on Stack Overflow makes sense if you control the
// writer: https://stackoverflow.com/a/18330903/1048862 - and I did get it
// to work in that case -- it is not a general solution. Seems that the only
// reliable thing to do is scan the entire archive to find the last file,
// read its size, then use that to compute the end of content and thus the
// true length of end-of-archive padding. This is slightly more complex than
// just adding the size of the last file to the current stream/seek position,
// because we have to align to 512-byte blocks precisely. I don't actually
// fully know why this works, but in my testing on a few different files it
// did work, whereas other solutions only worked on 1 specific file. *shrug*
//
// Another option is to scan the file for the last contiguous series of 0s,
// without interpreting the tar format at all, and to find the nearest
// blocksize-offset and start writing there. Problem is that you wouldn't
// know if you just overwrote some of the last file if it ends with all 0s.
// Sigh.
var lastFileSize, lastStreamPos int64
tr := tar.NewReader(into)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
lastStreamPos, err = into.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
lastFileSize = hdr.Size
}
// we can now compute the precise location to write the new file to (I think)
const blockSize = 512 // (as of Go 1.17, this is also a hard-coded const in the archive/tar package)
newOffset := lastStreamPos + lastFileSize
newOffset += blockSize - (newOffset % blockSize) // shift to next-nearest block boundary
_, err := into.Seek(newOffset, io.SeekStart)
if err != nil {
return err
}
tw := tar.NewWriter(into)
defer tw.Close()
for i, file := range files {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
err = t.writeFileToArchive(ctx, tw, file)
if err != nil {
if t.ContinueOnError && ctx.Err() == nil {
log.Printf("[ERROR] appending file %d into archive: %s: %v", i, file.Name(), err)
continue
}
return fmt.Errorf("appending file %d into archive: %s: %w", i, file.Name(), err)
}
}
return nil
}
func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
tr := tar.NewReader(sourceArchive)
// important to initialize to non-nil, empty value due to how fileIsIncluded works
skipDirs := skipList{}
for {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
if t.ContinueOnError && ctx.Err() == nil {
log.Printf("[ERROR] Advancing to next file in tar archive: %v", err)
continue
}
return err
}
if fileIsIncluded(skipDirs, hdr.Name) {
continue
}
if hdr.Typeflag == tar.TypeXGlobalHeader {
// ignore the pax global header from git-generated tarballs
continue
}
info := hdr.FileInfo()
file := FileInfo{
FileInfo: info,
Header: hdr,
NameInArchive: hdr.Name,
LinkTarget: hdr.Linkname,
Open: func() (fs.File, error) {
return fileInArchive{io.NopCloser(tr), info}, nil
},
}
err = handleFile(ctx, file)
if errors.Is(err, fs.SkipAll) {
// At first, I wasn't sure if fs.SkipAll implied that the rest of the entries
// should still be iterated and just "skipped" (i.e. no-ops) or if the walk
// should stop; both have the same net effect, one is just less efficient...
// apparently the name of fs.StopWalk was the preferred name, but it still
// became fs.SkipAll because of semantics with documentation; see
// https://github.com/golang/go/issues/47209 -- anyway, the walk should stop.
break
} else if errors.Is(err, fs.SkipDir) && file.IsDir() {
skipDirs.add(hdr.Name)
} else if err != nil {
return fmt.Errorf("handling file: %s: %w", hdr.Name, err)
}
}
return nil
}
// Interface guards
var (
_ Archiver = (*Tar)(nil)
_ ArchiverAsync = (*Tar)(nil)
_ Extractor = (*Tar)(nil)
_ Inserter = (*Tar)(nil)
)