mattermost-community-enterp.../vendor/github.com/mholt/archives/tar.go

package archives

import (
	"archive/tar"
	"context"
	"errors"
	"fmt"
	"io"
	"io/fs"
	"log"
	"strings"
)

func init() {
	RegisterFormat(Tar{})
}

type Tar struct {
	// Specify the tar format to use when writing headers.
	// The default is whichever format is capable of encoding
	// the header being written, from this ordered list:
	// USTAR, PAX, GNU.
	Format tar.Format

	// DEPRECATED: Use [Tar.Format] instead.
	FormatGNU bool

	// If true, preserve only numeric user and group id
	NumericUIDGID bool

	// If true, errors encountered during reading or writing
	// a file within an archive will be logged and the
	// operation will continue on remaining files.
	ContinueOnError bool

	// User ID of the file owner
	Uid int

	// Group ID of the file owner
	Gid int

	// Username of the file owner
	Uname string

	// Group name of the file owner
	Gname string
}

func (Tar) Extension() string { return ".tar" }
func (Tar) MediaType() string { return "application/x-tar" }

func (t Tar) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
	var mr MatchResult

	// match filename
	if strings.Contains(strings.ToLower(filename), t.Extension()) {
		mr.ByName = true
	}

	// match file header
	if stream != nil {
		r := tar.NewReader(stream)
		_, err := r.Next()
		mr.ByStream = err == nil
	}

	return mr, nil
}

func (t Tar) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
	tw := tar.NewWriter(output)
	defer tw.Close()

	for _, file := range files {
		if err := t.writeFileToArchive(ctx, tw, file); err != nil {
			if t.ContinueOnError && ctx.Err() == nil { // context errors should always abort
				log.Printf("[ERROR] %v", err)
				continue
			}
			return err
		}
	}

	return nil
}

func (t Tar) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
	tw := tar.NewWriter(output)
	defer tw.Close()

	for job := range jobs {
		job.Result <- t.writeFileToArchive(ctx, tw, job.File)
	}

	return nil
}

func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file FileInfo) error {
	if err := ctx.Err(); err != nil {
		return err // honor context cancellation
	}

	hdr, err := tar.FileInfoHeader(file, file.LinkTarget)
	if err != nil {
		return fmt.Errorf("file %s: creating header: %w", file.NameInArchive, err)
	}
	hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name
	if hdr.Name == "" {
		hdr.Name = file.Name() // assume base name of file I guess
	}
	// TODO: FormatGNU is deprecated; remove soon
	if t.FormatGNU {
		hdr.Format = tar.FormatGNU
	}
	if t.Format != 0 {
		hdr.Format = t.Format
	}
	if t.NumericUIDGID {
		hdr.Uname = ""
		hdr.Gname = ""
	}
	if t.Uid != 0 {
		hdr.Uid = t.Uid
	}
	if t.Gid != 0 {
		hdr.Gid = t.Gid
	}
	if t.Uname != "" {
		hdr.Uname = t.Uname
	}
	if t.Gname != "" {
		hdr.Gname = t.Gname
	}

	if err := tw.WriteHeader(hdr); err != nil {
		return fmt.Errorf("file %s: writing header: %w", file.NameInArchive, err)
	}

	// only proceed to write a file body if there is actually a body
	// (for example, directories and links don't have a body)
	if hdr.Typeflag != tar.TypeReg {
		return nil
	}

	if err := openAndCopyFile(file, tw); err != nil {
		return fmt.Errorf("file %s: writing data: %w", file.NameInArchive, err)
	}

	return nil
}

func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []FileInfo) error {
	// Tar files may end with some, none, or a lot of zero-byte padding. The spec says
	// it should end with two 512-byte trailer records consisting solely of null/0
	// bytes: https://www.gnu.org/software/tar/manual/html_node/Standard.html. However,
	// in my experiments using the `tar` command, I've found that is not the case,
	// and Colin Percival (author of tarsnap) confirmed this:
	// - https://twitter.com/cperciva/status/1476774314623913987
	// - https://twitter.com/cperciva/status/1476776999758663680
	// So while this solution on Stack Overflow makes sense if you control the
	// writer: https://stackoverflow.com/a/18330903/1048862 - and I did get it
	// to work in that case -- it is not a general solution. Seems that the only
	// reliable thing to do is scan the entire archive to find the last file,
	// read its size, then use that to compute the end of content and thus the
	// true length of end-of-archive padding. This is slightly more complex than
	// just adding the size of the last file to the current stream/seek position,
	// because we have to align to 512-byte blocks precisely. I don't actually
	// fully know why this works, but in my testing on a few different files it
	// did work, whereas other solutions only worked on 1 specific file. *shrug*
	//
	// Another option is to scan the file for the last contiguous series of 0s,
	// without interpreting the tar format at all, and to find the nearest
	// blocksize-offset and start writing there. Problem is that you wouldn't
	// know if you just overwrote some of the last file if it ends with all 0s.
	// Sigh.
	var lastFileSize, lastStreamPos int64
	tr := tar.NewReader(into)
	for {
		hdr, err := tr.Next()
		if err == io.EOF {
			break
		}
		if err != nil {
			return err
		}
		lastStreamPos, err = into.Seek(0, io.SeekCurrent)
		if err != nil {
			return err
		}
		lastFileSize = hdr.Size
	}

	// we can now compute the precise location to write the new file to (I think)
	const blockSize = 512 // (as of Go 1.17, this is also a hard-coded const in the archive/tar package)
	newOffset := lastStreamPos + lastFileSize
	newOffset += blockSize - (newOffset % blockSize) // shift to next-nearest block boundary
	_, err := into.Seek(newOffset, io.SeekStart)
	if err != nil {
		return err
	}

	tw := tar.NewWriter(into)
	defer tw.Close()

	for i, file := range files {
		if err := ctx.Err(); err != nil {
			return err // honor context cancellation
		}
		err = t.writeFileToArchive(ctx, tw, file)
		if err != nil {
			if t.ContinueOnError && ctx.Err() == nil {
				log.Printf("[ERROR] appending file %d into archive: %s: %v", i, file.Name(), err)
				continue
			}
			return fmt.Errorf("appending file %d into archive: %s: %w", i, file.Name(), err)
		}
	}

	return nil
}

func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
	tr := tar.NewReader(sourceArchive)

	// important to initialize to non-nil, empty value due to how fileIsIncluded works
	skipDirs := skipList{}

	for {
		if err := ctx.Err(); err != nil {
			return err // honor context cancellation
		}

		hdr, err := tr.Next()
		if err == io.EOF {
			break
		}
		if err != nil {
			if t.ContinueOnError && ctx.Err() == nil {
				log.Printf("[ERROR] Advancing to next file in tar archive: %v", err)
				continue
			}
			return err
		}
		if fileIsIncluded(skipDirs, hdr.Name) {
			continue
		}
		if hdr.Typeflag == tar.TypeXGlobalHeader {
			// ignore the pax global header from git-generated tarballs
			continue
		}

		info := hdr.FileInfo()
		file := FileInfo{
			FileInfo:      info,
			Header:        hdr,
			NameInArchive: hdr.Name,
			LinkTarget:    hdr.Linkname,
			Open: func() (fs.File, error) {
				return fileInArchive{io.NopCloser(tr), info}, nil
			},
		}

		err = handleFile(ctx, file)
		if errors.Is(err, fs.SkipAll) {
			// At first, I wasn't sure if fs.SkipAll implied that the rest of the entries
			// should still be iterated and just "skipped" (i.e. no-ops) or if the walk
			// should stop; both have the same net effect, one is just less efficient...
			// apparently the name of fs.StopWalk was the preferred name, but it still
			// became fs.SkipAll because of semantics with documentation; see
			// https://github.com/golang/go/issues/47209 -- anyway, the walk should stop.
			break
		} else if errors.Is(err, fs.SkipDir) && file.IsDir() {
			skipDirs.add(hdr.Name)
		} else if err != nil {
			return fmt.Errorf("handling file: %s: %w", hdr.Name, err)
		}
	}

	return nil
}

// Interface guards
var (
	_ Archiver      = (*Tar)(nil)
	_ ArchiverAsync = (*Tar)(nil)
	_ Extractor     = (*Tar)(nil)
	_ Inserter      = (*Tar)(nil)
)