1 Star 0 Fork 0

tym_hmm/commpress-file

加入 Gitee
与超过 1400万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
tar.go 6.78 KB
一键复制 编辑 原始数据 按行查看 历史
天蝎儿 提交于 2024-07-24 12:53 +08:00 . edit
package archiver
import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"io/fs"
"log"
"path"
"strings"
)
func init() {
RegisterFormat(Tar{})
}
type Tar struct {
// If true, preserve only numeric user and group id
NumericUIDGID bool
// If true, errors encountered during reading or writing
// a file within an archive will be logged and the
// operation will continue on remaining files.
ContinueOnError bool
}
func (Tar) Name() string { return ".tar" }
func (t Tar) Match(filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult
// match filename
if strings.Contains(strings.ToLower(filename), t.Name()) {
mr.ByName = true
}
// match file header
if stream != nil {
r := tar.NewReader(stream)
_, err := r.Next()
mr.ByStream = err == nil
}
return mr, nil
}
func (t Tar) Archive(ctx context.Context, output io.Writer, files []File) error {
tw := tar.NewWriter(output)
defer tw.Close()
for _, file := range files {
if err := t.writeFileToArchive(ctx, tw, file); err != nil {
if t.ContinueOnError && ctx.Err() == nil { // context errors should always abort
log.Printf("[ERROR] %v", err)
continue
}
return err
}
}
return nil
}
func (t Tar) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
tw := tar.NewWriter(output)
defer tw.Close()
for job := range jobs {
job.Result <- t.writeFileToArchive(ctx, tw, job.File)
}
return nil
}
func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) error {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
hdr, err := tar.FileInfoHeader(file, file.LinkTarget)
if err != nil {
return fmt.Errorf("file %s: creating header: %w", file.NameInArchive, err)
}
hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name
if hdr.Name == "" {
hdr.Name = file.Name() // assume base name of file I guess
}
if t.NumericUIDGID {
hdr.Uname = ""
hdr.Gname = ""
}
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("file %s: writing header: %w", file.NameInArchive, err)
}
// only proceed to write a file body if there is actually a body
// (for example, directories and links don't have a body)
if hdr.Typeflag != tar.TypeReg {
return nil
}
if err := openAndCopyFile(file, tw); err != nil {
return fmt.Errorf("file %s: writing data: %w", file.NameInArchive, err)
}
return nil
}
func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []File) error {
// Tar files may end with some, none, or a lot of zero-byte padding. The spec says
// it should end with two 512-byte trailer records consisting solely of null/0
// bytes: https://www.gnu.org/software/tar/manual/html_node/Standard.html. However,
// in my experiments using the `tar` command, I've found that is not the case,
// and Colin Percival (author of tarsnap) confirmed this:
// - https://twitter.com/cperciva/status/1476774314623913987
// - https://twitter.com/cperciva/status/1476776999758663680
// So while this solution on Stack Overflow makes sense if you control the
// writer: https://stackoverflow.com/a/18330903/1048862 - and I did get it
// to work in that case -- it is not a general solution. Seems that the only
// reliable thing to do is scan the entire archive to find the last file,
// read its size, then use that to compute the end of content and thus the
// true length of end-of-archive padding. This is slightly more complex than
// just adding the size of the last file to the current stream/seek position,
// because we have to align to 512-byte blocks precisely. I don't actually
// fully know why this works, but in my testing on a few different files it
// did work, whereas other solutions only worked on 1 specific file. *shrug*
//
// Another option is to scan the file for the last contiguous series of 0s,
// without interpreting the tar format at all, and to find the nearest
// blocksize-offset and start writing there. Problem is that you wouldn't
// know if you just overwrote some of the last file if it ends with all 0s.
// Sigh.
var lastFileSize, lastStreamPos int64
tr := tar.NewReader(into)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
lastStreamPos, err = into.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
lastFileSize = hdr.Size
}
// we can now compute the precise location to write the new file to (I think)
const blockSize = 512 // (as of Go 1.17, this is also a hard-coded const in the archive/tar package)
newOffset := lastStreamPos + lastFileSize
newOffset += blockSize - (newOffset % blockSize) // shift to next-nearest block boundary
_, err := into.Seek(newOffset, io.SeekStart)
if err != nil {
return err
}
tw := tar.NewWriter(into)
defer tw.Close()
for i, file := range files {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
err = t.writeFileToArchive(ctx, tw, file)
if err != nil {
if t.ContinueOnError && ctx.Err() == nil {
log.Printf("[ERROR] appending file %d into archive: %s: %v", i, file.Name(), err)
continue
}
return fmt.Errorf("appending file %d into archive: %s: %w", i, file.Name(), err)
}
}
return nil
}
func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error {
tr := tar.NewReader(sourceArchive)
// important to initialize to non-nil, empty value due to how fileIsIncluded works
skipDirs := skipList{}
for {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
if t.ContinueOnError && ctx.Err() == nil {
log.Printf("[ERROR] Advancing to next file in tar archive: %v", err)
continue
}
return err
}
if !fileIsIncluded(pathsInArchive, hdr.Name) {
continue
}
if fileIsIncluded(skipDirs, hdr.Name) {
continue
}
if hdr.Typeflag == tar.TypeXGlobalHeader {
// ignore the pax global header from git-generated tarballs
continue
}
file := File{
FileInfo: hdr.FileInfo(),
Header: hdr,
NameInArchive: hdr.Name,
LinkTarget: hdr.Linkname,
Open: func() (io.ReadCloser, error) { return io.NopCloser(tr), nil },
}
err = handleFile(ctx, file)
if errors.Is(err, fs.SkipAll) {
break
} else if errors.Is(err, fs.SkipDir) {
// if a directory, skip this path; if a file, skip the folder path
dirPath := hdr.Name
if hdr.Typeflag != tar.TypeDir {
dirPath = path.Dir(hdr.Name) + "/"
}
skipDirs.add(dirPath)
} else if err != nil {
return fmt.Errorf("handling file: %s: %w", hdr.Name, err)
}
}
return nil
}
// Interface guards
var (
_ Archiver = (*Tar)(nil)
_ ArchiverAsync = (*Tar)(nil)
_ Extractor = (*Tar)(nil)
_ Inserter = (*Tar)(nil)
)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/tym_hmm/commpress-file.git
git@gitee.com:tym_hmm/commpress-file.git
tym_hmm
commpress-file
commpress-file
v1.0.4

搜索帮助