From 92e6d1bae6a7a1029fb973e2e14e59f759f182db Mon Sep 17 00:00:00 2001 From: Shane C Date: Fri, 1 Nov 2024 12:34:12 -0400 Subject: [PATCH] initial commit --- .gitignore | 7 + LICENSE | 10 + README.md | 9 + archive.go | 180 +++++++++++++++++ archive_test.go | 499 ++++++++++++++++++++++++++++++++++++++++++++++++ extract.go | 359 ++++++++++++++++++++++++++++++++++ go.mod | 16 ++ go.sum | 27 +++ open.go | 188 ++++++++++++++++++ read.go | 98 ++++++++++ 10 files changed, 1393 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 archive.go create mode 100644 archive_test.go create mode 100644 extract.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 open.go create mode 100644 read.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1539299 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +# IDE +.vscode +.idea +.fleet + +# Tests +test \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..cde4ac6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,10 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9e9310 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# Archiver +A golang library for extracting and creating archives. + +## Supported Formats +- `.tar.gz` +- `.tar.bz2` +- `.tar.xz` +- `.tar` +- `.zip` \ No newline at end of file diff --git a/archive.go b/archive.go new file mode 100644 index 0000000..e27779f --- /dev/null +++ b/archive.go @@ -0,0 +1,180 @@ +package archiver + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "embed" + "errors" + "github.com/go-git/go-billy/v5" + "io/fs" + "os" + "regexp" +) + +type Type string + +var ( + ErrArchiveTypeNotSupported = errors.New("archive type not supported") + ErrArchiveFileNotFound = errors.New("archive file not found") +) + +const ( + TarGzip Type = "tar.gz" + TarBzip = "tar.bz2" + TarXz = "tar.xz" + Tar = "tar" + Zip = "zip" +) + +type Archive struct { + // Type of Archive + Type Type + // Path of Archive file + Path string + reader interface{} + tarReader *tar.Reader // Used for anything with .tar due to how tar.Reader cannot be reset. + files map[string]*File + archiveFile *bytes.Reader +} + +// Filesystem represents a standard interface for filesystems. +// Currently, supports fs.FS and billy.Filesystem. +type Filesystem struct { + billyFS billy.Filesystem + fs fs.FS + embed *embed.FS + file bool + path string +} + +// WrapBillyFS wraps billy.Filesystem to the Filesystem interface. +// Returns Filesystem. +func WrapBillyFS(filesystem billy.Filesystem) Filesystem { + return Filesystem{ + billyFS: filesystem, + } +} + +// WrapFS wraps fs.FS to the Filesystem interface. +// Returns Filesystem. +func WrapFS(filesystem fs.FS) Filesystem { + return Filesystem{ + fs: filesystem, + } +} + +// WrapPath wraps a path to the Filesystem interface. +// Returns Filesystem. +func WrapPath(path string) Filesystem { + return Filesystem{ + path: path, + } +} + +// ExtractOptions represents options for extracting an Archive +type ExtractOptions struct { + // Whether to overwrite files if they already exist. + Overwrite bool + // Folder to extract Archive to. + Folder string + // Whether to preserve the file structure in the Archive or not. + NotPreserveFileStructure bool + // Optional regex filter for specific files. + Filter *regexp.Regexp +} + +// ExtractFileOptions represents options for extracting an ArchiveFile +type ExtractFileOptions struct { + // Whether to overwrite files if they already exist. + Overwrite bool + // Perms for output file, default is the permission in the Archive. + Perms os.FileMode + // Folder to extract Archive to. + Folder string + // Whether to preserve the file structure in the Archive or not. + NotPreserveFileStructure bool +} + +// File represents a file in an archive. +type File struct { + // Name of file (e.g "test.txt") + FileName string + // Path of file in archive (e.g "folder/test.txt") + Path string + archive *Archive +} + +// GetFile gets a File from the Archive +// It takes the path of the file in the Archive as its parameter. +// The function returns ArchiveFile and an error, if any. +func (a *Archive) GetFile(path string) (*File, error) { + file, ok := a.files[path] + if !ok { + return nil, ErrArchiveFileNotFound + } + + return file, nil +} + +// FileCount gets the total file count in the Archive. +// The function returns an integer. +func (a *Archive) FileCount() int { + return len(a.files) +} + +// ExtractBillyFS extracts the specified Archive into billy.Filesystem. +// It takes billy.Filesystem and ExtractOptions as its parameters. +// The function returns an error, if any. +func (a *Archive) ExtractBillyFS(filesystem billy.Filesystem, opts ExtractOptions) error { + return extract(WrapBillyFS(filesystem), opts, a) +} + +// Extract extracts the specified Archive to a specified directory. +// It takes ExtractOptions as its parameter +// The function returns an error, if any. +func (a *Archive) Extract(opts ExtractOptions) error { + return extract(Filesystem{file: true}, opts, a) +} + +// Close closes the Archive. +// The function returns an error, if any. +func (a *Archive) Close() error { + a.archiveFile = nil + + switch a.Type { + case TarGzip: + if err := a.reader.(*gzip.Reader).Close(); err != nil { + return err + } + } + + return nil +} + +// ExtractBillyFS extracts the specified File in the archive into billy.Filesystem. +// It takes billy.Filesystem and ExtractFileOptions as its parameters. +// The function returns an error, if any. +func (f *File) ExtractBillyFS(filesystem billy.Filesystem, opts ExtractFileOptions) error { + return extractFile(WrapBillyFS(filesystem), opts, f) +} + +// Extract extracts the specified File in the archive to a specified directory. +// It takes ExtractFileOptions as its parameter. +// The function returns an error, if any. +func (f *File) Extract(opts ExtractFileOptions) error { + return extractFile(Filesystem{file: true}, opts, f) +} + +// Read reads the specified File in the archive and returns the content. +// The function returns the content of the File, and an error, if any. +func (f *File) Read() ([]byte, error) { + switch f.archive.Type { + case Tar, TarGzip, TarBzip, TarXz: + return tarRead(f) + case Zip: + return zipRead(f) + default: + return nil, ErrArchiveTypeNotSupported + } +} diff --git a/archive_test.go b/archive_test.go new file mode 100644 index 0000000..24a5d74 --- /dev/null +++ b/archive_test.go @@ -0,0 +1,499 @@ +package archiver + +import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "github.com/dsnet/compress/bzip2" + "github.com/go-git/go-billy/v5/memfs" + "github.com/stretchr/testify/assert" + "github.com/ulikunitz/xz" + "os" + "regexp" + "strconv" + "testing" +) + +const ( + testArchiveBaseDir = "./test/archives" +) + +var archiveRegex = regexp.MustCompile(`(?m)test[1|5]`) + +func TestArchiveZip(t *testing.T) { + err := os.MkdirAll(testArchiveBaseDir, os.ModePerm) + assert.NoError(t, err) + testGenerateZip(t) + + t.Log("== OpenArchive ==") + archive, err := Open(Zip, testArchiveBaseDir+"/test.zip") + assert.NoError(t, err) + assert.Equal(t, archive.FileCount(), 10) + + t.Log("== ExtractArchiveFile ==") + archiveFile, err := archive.GetFile("test0.txt") + assert.NoError(t, err) + + err = archiveFile.Extract(ExtractFileOptions{ + Folder: testArchiveBaseDir + "/extracted/zip", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/zip") + assert.NoError(t, err) + + t.Log("== ExtractArchive ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/zip", + }) + assert.NoError(t, err) + + t.Log("== ExtractArchiveFilter ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/zip", + Filter: archiveRegex, + }) + + assert.FileExists(t, testArchiveBaseDir+"/extracted/zip/test1.txt") + assert.FileExists(t, testArchiveBaseDir+"/extracted/zip/test5.txt") + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/zip") + assert.NoError(t, err) + + err = archive.Close() + assert.NoError(t, err) +} + +func TestArchiveTarGzip(t *testing.T) { + err := os.MkdirAll(testArchiveBaseDir, os.ModePerm) + assert.NoError(t, err) + testGenerateTarGzip(t) + + t.Log("== OpenArchive ==") + archive, err := Open(TarGzip, testArchiveBaseDir+"/test.tar.gz") + assert.NoError(t, err) + assert.Equal(t, archive.FileCount(), 10) + + t.Log("== ExtractArchiveFile ==") + archiveFile, err := archive.GetFile("test0.txt") + assert.NoError(t, err) + + err = archiveFile.Extract(ExtractFileOptions{ + Folder: testArchiveBaseDir + "/extracted/gz", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/gz") + assert.NoError(t, err) + + t.Log("== ExtractArchive ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/gz", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/gz") + assert.NoError(t, err) + + t.Log("== ExtractArchiveFilter ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/gz", + Filter: archiveRegex, + }) + + assert.FileExists(t, testArchiveBaseDir+"/extracted/gz/test1.txt") + assert.FileExists(t, testArchiveBaseDir+"/extracted/gz/test5.txt") + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/gz") + assert.NoError(t, err) + + err = archive.Close() + assert.NoError(t, err) +} + +func TestArchiveTarBzip(t *testing.T) { + err := os.MkdirAll(testArchiveBaseDir, os.ModePerm) + assert.NoError(t, err) + testGenerateTarBzip(t) + + t.Log("== OpenArchive ==") + archive, err := Open(TarBzip, testArchiveBaseDir+"/test.tar.bz2") + assert.NoError(t, err) + assert.Equal(t, archive.FileCount(), 10) + + t.Log("== ExtractArchiveFile ==") + archiveFile, err := archive.GetFile("test0.txt") + assert.NoError(t, err) + + err = archiveFile.Extract(ExtractFileOptions{ + Folder: testArchiveBaseDir + "/extracted/bzip", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/bzip") + assert.NoError(t, err) + + t.Log("== ExtractArchive ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/bzip", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/bzip") + assert.NoError(t, err) + + t.Log("== ExtractArchiveFilter ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/bzip", + Filter: archiveRegex, + }) + + assert.FileExists(t, testArchiveBaseDir+"/extracted/bzip/test1.txt") + assert.FileExists(t, testArchiveBaseDir+"/extracted/bzip/test5.txt") + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/bzip") + assert.NoError(t, err) + + err = archive.Close() + assert.NoError(t, err) +} + +func TestArchiveTarXz(t *testing.T) { + err := os.MkdirAll(testArchiveBaseDir, os.ModePerm) + assert.NoError(t, err) + testGenerateTarXz(t) + + t.Log("== OpenArchive ==") + archive, err := Open(TarXz, testArchiveBaseDir+"/test.tar.xz") + assert.NoError(t, err) + assert.Equal(t, archive.FileCount(), 10) + + t.Log("== ExtractArchiveFile ==") + archiveFile, err := archive.GetFile("test0.txt") + assert.NoError(t, err) + + err = archiveFile.Extract(ExtractFileOptions{ + Folder: testArchiveBaseDir + "/extracted/xz", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/xz") + assert.NoError(t, err) + + t.Log("== ExtractArchive ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/xz", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/xz") + assert.NoError(t, err) + + t.Log("== ExtractArchiveFilter ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/xz", + Filter: archiveRegex, + }) + + assert.FileExists(t, testArchiveBaseDir+"/extracted/xz/test1.txt") + assert.FileExists(t, testArchiveBaseDir+"/extracted/xz/test5.txt") + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/xz") + assert.NoError(t, err) + + err = archive.Close() + assert.NoError(t, err) +} + +func TestArchiveTar(t *testing.T) { + err := os.MkdirAll(testArchiveBaseDir, os.ModePerm) + assert.NoError(t, err) + testGenerateTar(t) + + t.Log("== OpenArchive ==") + archive, err := Open(Tar, testArchiveBaseDir+"/test.tar") + assert.NoError(t, err) + assert.Equal(t, 10, archive.FileCount()) + + t.Log("== ExtractArchiveFile ==") + archiveFile, err := archive.GetFile("test0.txt") + assert.NoError(t, err) + + err = archiveFile.Extract(ExtractFileOptions{ + Folder: testArchiveBaseDir + "/extracted/tar", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/tar") + assert.NoError(t, err) + + t.Log("== ExtractArchive ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/tar", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/tar") + assert.NoError(t, err) + + t.Log("== ExtractArchiveFilter ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/tar", + Filter: archiveRegex, + }) + + assert.FileExists(t, testArchiveBaseDir+"/extracted/tar/test1.txt") + assert.FileExists(t, testArchiveBaseDir+"/extracted/tar/test5.txt") + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/tar") + assert.NoError(t, err) + + err = archive.Close() + assert.NoError(t, err) +} + +func TestArchiveFS(t *testing.T) { + err := os.MkdirAll(testArchiveBaseDir, os.ModePerm) + assert.NoError(t, err) + + memoryFS := memfs.New() + + archiveF, err := memoryFS.OpenFile("archive.tar.gz", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm) + assert.NoError(t, err) + defer archiveF.Close() + + gzipWriter := gzip.NewWriter(archiveF) + defer gzipWriter.Close() + + tarWriter := tar.NewWriter(gzipWriter) + defer tarWriter.Close() + + for i := 0; i < 10; i++ { + tarHeader := &tar.Header{ + Name: "test" + strconv.Itoa(i) + ".txt", + Size: int64(len([]byte("Hello, World! #" + strconv.Itoa(i)))), + Mode: 0600, + } + err = tarWriter.WriteHeader(tarHeader) + assert.NoError(t, err) + + _, err = tarWriter.Write([]byte("Hello, World! #" + strconv.Itoa(i))) + assert.NoError(t, err) + } + + err = tarWriter.Close() + assert.NoError(t, err) + err = gzipWriter.Close() + assert.NoError(t, err) + err = archiveF.Close() + assert.NoError(t, err) + + t.Log("== OpenArchive ==") + archive, err := OpenFS(WrapBillyFS(memoryFS), TarGzip, "archive.tar.gz") + assert.NoError(t, err) + assert.Equal(t, 10, archive.FileCount()) + + t.Log("== ExtractArchiveFile ==") + archiveFile, err := archive.GetFile("test0.txt") + assert.NoError(t, err) + + err = archiveFile.Extract(ExtractFileOptions{ + Folder: testArchiveBaseDir + "/extracted/fs", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/fs") + assert.NoError(t, err) + + t.Log("== ExtractArchive ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/fs", + }) + assert.NoError(t, err) + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/fs") + assert.NoError(t, err) + + t.Log("== ExtractArchiveFilter ==") + err = archive.Extract(ExtractOptions{ + Folder: testArchiveBaseDir + "/extracted/fs", + Filter: archiveRegex, + }) + + assert.FileExists(t, testArchiveBaseDir+"/extracted/fs/test1.txt") + assert.FileExists(t, testArchiveBaseDir+"/extracted/fs/test5.txt") + + err = os.RemoveAll(testArchiveBaseDir + "/extracted/fs") + assert.NoError(t, err) + + err = archive.Close() + assert.NoError(t, err) +} + +func TestExtractArchiveBillyFS(t *testing.T) { + err := os.MkdirAll(testArchiveBaseDir, os.ModePerm) + assert.NoError(t, err) + testGenerateTar(t) + + archive, err := Open(Tar, testArchiveBaseDir+"/test.tar") + assert.NoError(t, err) + + memoryFS := memfs.New() + + err = archive.ExtractBillyFS(memoryFS, ExtractOptions{ + Folder: memoryFS.Root(), + }) + assert.NoError(t, err) + + files, err := memoryFS.ReadDir(memoryFS.Root()) + assert.NoError(t, err) + + assert.Equal(t, 10, len(files)) + + for _, file := range files { + assert.NotEqual(t, int64(0), file.Size()) + } +} + +func testGenerateTar(t *testing.T) { + tarFile, err := os.Create(testArchiveBaseDir + "/test.tar") + defer tarFile.Close() + assert.NoError(t, err) + + tarWriter := tar.NewWriter(tarFile) + defer tarWriter.Close() + + for i := 0; i < 10; i++ { + tarHeader := &tar.Header{ + Name: "test" + strconv.Itoa(i) + ".txt", + Size: int64(len([]byte("Hello, World! #" + strconv.Itoa(i)))), + Mode: 0600, + } + err = tarWriter.WriteHeader(tarHeader) + assert.NoError(t, err) + + _, err = tarWriter.Write([]byte("Hello, World! #" + strconv.Itoa(i))) + assert.NoError(t, err) + } + + err = tarWriter.Close() + assert.NoError(t, err) + err = tarFile.Close() + assert.NoError(t, err) +} + +func testGenerateTarXz(t *testing.T) { + tarXzFile, err := os.Create(testArchiveBaseDir + "/test.tar.xz") + defer tarXzFile.Close() + assert.NoError(t, err) + + xzWriter, err := xz.NewWriter(tarXzFile) + defer xzWriter.Close() + assert.NoError(t, err) + + tarWriter := tar.NewWriter(xzWriter) + defer tarWriter.Close() + + for i := 0; i < 10; i++ { + tarHeader := &tar.Header{ + Name: "test" + strconv.Itoa(i) + ".txt", + Size: int64(len([]byte("Hello, World! #" + strconv.Itoa(i)))), + Mode: 0600, + } + err = tarWriter.WriteHeader(tarHeader) + assert.NoError(t, err) + + _, err = tarWriter.Write([]byte("Hello, World! #" + strconv.Itoa(i))) + assert.NoError(t, err) + } + + err = tarWriter.Close() + assert.NoError(t, err) + err = xzWriter.Close() + assert.NoError(t, err) + err = tarXzFile.Close() + assert.NoError(t, err) +} + +func testGenerateTarBzip(t *testing.T) { + tarBzFile, err := os.Create(testArchiveBaseDir + "/test.tar.bz2") + defer tarBzFile.Close() + assert.NoError(t, err) + + bzipWriter, err := bzip2.NewWriter(tarBzFile, &bzip2.WriterConfig{}) + defer bzipWriter.Close() + assert.NoError(t, err) + + tarWriter := tar.NewWriter(bzipWriter) + defer tarWriter.Close() + + for i := 0; i < 10; i++ { + tarHeader := &tar.Header{ + Name: "test" + strconv.Itoa(i) + ".txt", + Size: int64(len([]byte("Hello, World! #" + strconv.Itoa(i)))), + Mode: 0600, + } + err = tarWriter.WriteHeader(tarHeader) + assert.NoError(t, err) + + _, err = tarWriter.Write([]byte("Hello, World! #" + strconv.Itoa(i))) + assert.NoError(t, err) + } + + err = tarWriter.Close() + assert.NoError(t, err) + err = bzipWriter.Close() + assert.NoError(t, err) + err = tarBzFile.Close() + assert.NoError(t, err) +} + +func testGenerateTarGzip(t *testing.T) { + tarGzFile, err := os.Create(testArchiveBaseDir + "/test.tar.gz") + defer tarGzFile.Close() + assert.NoError(t, err) + + gzipWriter := gzip.NewWriter(tarGzFile) + defer gzipWriter.Close() + + tarWriter := tar.NewWriter(gzipWriter) + defer tarWriter.Close() + + for i := 0; i < 10; i++ { + tarHeader := &tar.Header{ + Name: "test" + strconv.Itoa(i) + ".txt", + Size: int64(len([]byte("Hello, World! #" + strconv.Itoa(i)))), + Mode: 0600, + } + err = tarWriter.WriteHeader(tarHeader) + assert.NoError(t, err) + + _, err = tarWriter.Write([]byte("Hello, World! #" + strconv.Itoa(i))) + assert.NoError(t, err) + } + + err = tarWriter.Close() + assert.NoError(t, err) + err = gzipWriter.Close() + assert.NoError(t, err) + err = tarGzFile.Close() + assert.NoError(t, err) +} + +func testGenerateZip(t *testing.T) { + zipFile, err := os.Create(testArchiveBaseDir + "/test.zip") + defer zipFile.Close() + assert.NoError(t, err) + + zipWriter := zip.NewWriter(zipFile) + for i := 0; i < 10; i++ { + file, err := zipWriter.Create("test" + strconv.Itoa(i) + ".txt") + assert.NoError(t, err) + _, err = file.Write([]byte("Hello, World! #" + strconv.Itoa(i))) + assert.NoError(t, err) + } + err = zipWriter.Close() + assert.NoError(t, err) + err = zipFile.Close() + assert.NoError(t, err) +} diff --git a/extract.go b/extract.go new file mode 100644 index 0000000..0b17069 --- /dev/null +++ b/extract.go @@ -0,0 +1,359 @@ +package archiver + +import ( + "archive/tar" + "archive/zip" + "errors" + "io" + "io/fs" + "os" + "path/filepath" + "regexp" + "strings" +) + +type extractOptions struct { + Perms os.FileMode + Overwrite bool + Folder string + NotPreserveFileStructure bool + Filter *regexp.Regexp + File *File +} + +func extract(filesystem Filesystem, opts ExtractOptions, archive *Archive) error { + if filesystem.billyFS == nil { + filesystem.file = true + } + + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if err := billyFS.MkdirAll(opts.Folder, os.ModePerm); err != nil { + return err + } + } else if filesystem.file { + if err := os.MkdirAll(opts.Folder, os.ModePerm); err != nil { + return err + } + } + + extOptions := extractOptions{ + Folder: opts.Folder, + NotPreserveFileStructure: opts.NotPreserveFileStructure, + Overwrite: opts.Overwrite, + Filter: opts.Filter, + } + + switch archive.Type { + case Tar, TarGzip, TarBzip, TarXz: + return tarExtract(filesystem, extOptions, archive) + case Zip: + return zipExtract(filesystem, extOptions, archive) + default: + return ErrArchiveTypeNotSupported + } +} + +func extractFile(filesystem Filesystem, opts ExtractFileOptions, file *File) error { + if filesystem.billyFS == nil { + filesystem.file = true + } + + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if err := billyFS.MkdirAll(opts.Folder, os.ModePerm); err != nil { + return err + } + } else if filesystem.file { + if err := os.MkdirAll(opts.Folder, os.ModePerm); err != nil { + return err + } + } + + extOptions := extractOptions{ + Folder: opts.Folder, + NotPreserveFileStructure: opts.NotPreserveFileStructure, + Overwrite: opts.Overwrite, + File: file, + } + + switch file.archive.Type { + case Tar, TarGzip, TarBzip, TarXz: + return tarExtract(filesystem, extOptions, file.archive) + case Zip: + return zipExtract(filesystem, extOptions, file.archive) + default: + return ErrArchiveTypeNotSupported + } +} + +func zipExtract(filesystem Filesystem, opts extractOptions, archive *Archive) error { + if filesystem.billyFS == nil { + filesystem.file = true + } + for _, zipF := range archive.reader.(*zip.Reader).File { + + var splitPath []string + if opts.File != nil { + splitPath = strings.Split(opts.File.Path, "/") + } else { + splitPath = strings.Split(zipF.Name, "/") + } + splitPath = splitPath[:len(splitPath)-1] + + if !opts.NotPreserveFileStructure && zipF.FileInfo().IsDir() { + if opts.File != nil { + isFound := false + for _, folder := range splitPath { + if folder == filepath.Base(zipF.Name) { + isFound = true + break + } + } + if isFound { + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if err := billyFS.MkdirAll(filepath.Join(opts.Folder, zipF.Name), os.ModePerm); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Join(opts.Folder, zipF.Name), os.ModePerm); err != nil { + return err + } + } + + continue + } + } else { + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if err := billyFS.MkdirAll(filepath.Join(opts.Folder, zipF.Name), os.ModePerm); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Join(opts.Folder, zipF.Name), os.ModePerm); err != nil { + return err + } + } + continue + } + } + + if opts.File != nil && zipF.Name != opts.File.Path { + continue + } + + if opts.File == nil && opts.Filter != nil { + if !opts.Filter.MatchString(zipF.Name) { + continue + } + } + + var filePath string + if !opts.NotPreserveFileStructure { + filePath = filepath.Join(opts.Folder, zipF.Name) + } else { + filePath = filepath.Join(opts.Folder, filepath.Base(zipF.Name)) + } + + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if _, err := billyFS.Stat(filepath.Join(opts.Folder, filepath.Dir(zipF.Name))); err != nil { + if errors.Is(err, fs.ErrNotExist) { + if err := billyFS.MkdirAll(filepath.Join(opts.Folder, filepath.Dir(zipF.Name)), os.ModePerm); err != nil { + return err + } + } else { + return err + } + } + } else { + if _, err := os.Stat(filepath.Join(opts.Folder, filepath.Dir(zipF.Name))); err != nil { + if errors.Is(err, fs.ErrNotExist) { + if err := os.MkdirAll(filepath.Join(opts.Folder, filepath.Dir(zipF.Name)), os.ModePerm); err != nil { + return err + } + } else { + return err + } + } + } + + if opts.Perms == 0 { + opts.Perms = zipF.FileInfo().Mode() + } + + var file io.WriteCloser + + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + var err error + file, err = billyFS.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, opts.Perms) + if err != nil { + return nil + } + } else { + var err error + file, err = os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, opts.Perms) + if err != nil { + return err + } + } + + archiveFile, err := zipF.Open() + if err != nil { + return err + } + + if _, err := io.Copy(file, archiveFile); err != nil { + return err + } + + if err := archiveFile.Close(); err != nil { + return err + } + + if err := file.Close(); err != nil { + return err + } + + if opts.File != nil && zipF.Name == opts.File.Path { + break + } + } + return nil +} + +func tarExtract(filesystem Filesystem, opts extractOptions, archive *Archive) error { + defer tarCleanup(archive) + if filesystem.billyFS == nil { + filesystem.file = true + } + for { + header, err := archive.tarReader.Next() + if err != nil && err != io.EOF { + return err + } else if err == io.EOF { + break + } + + var splitPath []string + if opts.File != nil { + splitPath = strings.Split(opts.File.Path, "/") + } else { + splitPath = strings.Split(header.Name, "/") + } + splitPath = splitPath[:len(splitPath)-1] + + if !opts.NotPreserveFileStructure && header.Typeflag == tar.TypeDir { + if opts.File != nil { + isFound := false + for _, folder := range splitPath { + if folder == filepath.Base(header.Name) { + isFound = true + break + } + } + if isFound { + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if err := billyFS.MkdirAll(filepath.Join(opts.Folder, header.Name), os.ModePerm); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Join(opts.Folder, header.Name), os.ModePerm); err != nil { + return err + } + } + continue + } + } else { + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if err := billyFS.MkdirAll(filepath.Join(opts.Folder, header.Name), os.ModePerm); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Join(opts.Folder, header.Name), os.ModePerm); err != nil { + return err + } + } + continue + } + + } + + if opts.File != nil && header.Name != opts.File.Path { + continue + } + + if opts.File == nil && opts.Filter != nil { + if !opts.Filter.MatchString(header.Name) { + continue + } + } + + var filePath string + if !opts.NotPreserveFileStructure { + filePath = filepath.Join(opts.Folder, header.Name) + } else { + filePath = filepath.Join(opts.Folder, filepath.Base(header.Name)) + } + + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + if _, err := billyFS.Stat(filepath.Join(opts.Folder, filepath.Dir(header.Name))); err != nil { + if errors.Is(err, fs.ErrNotExist) { + if err := billyFS.MkdirAll(filepath.Join(opts.Folder, filepath.Dir(header.Name)), os.ModePerm); err != nil { + return err + } + } else { + return err + } + } + } else { + if _, err := os.Stat(filepath.Join(opts.Folder, filepath.Dir(header.Name))); err != nil { + if errors.Is(err, fs.ErrNotExist) { + if err := os.MkdirAll(filepath.Join(opts.Folder, filepath.Dir(header.Name)), os.ModePerm); err != nil { + return err + } + } else { + return err + } + } + } + + if opts.Perms == 0 { + opts.Perms = header.FileInfo().Mode() + } + var file io.WriteCloser + + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + var err error + file, err = billyFS.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, opts.Perms) + if err != nil { + return nil + } + } else { + var err error + file, err = os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, opts.Perms) + if err != nil { + return err + } + } + + if _, err = io.Copy(file, archive.tarReader); err != nil { + return err + } + + if err := file.Close(); err != nil { + return err + } + + if opts.File != nil && header.Name == opts.File.Path { + break + } + } + return nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..3532cbc --- /dev/null +++ b/go.mod @@ -0,0 +1,16 @@ +module gitlab.com/omnibill/archiver + +go 1.23.2 + +require ( + github.com/dsnet/compress v0.0.1 + github.com/go-git/go-billy/v5 v5.6.0 + github.com/stretchr/testify v1.9.0 + github.com/ulikunitz/xz v0.5.12 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..84fd9e3 --- /dev/null +++ b/go.sum @@ -0,0 +1,27 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q= +github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= +github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= +github.com/go-git/go-billy/v5 v5.6.0 h1:w2hPNtoehvJIxR00Vb4xX94qHQi/ApZfX+nBE2Cjio8= +github.com/go-git/go-billy/v5 v5.6.0/go.mod h1:sFDq7xD3fn3E0GOwUSZqHo9lrkmx8xJhA0ZrfvjBRGM= +github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= +github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= +github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/open.go b/open.go new file mode 100644 index 0000000..38ae046 --- /dev/null +++ b/open.go @@ -0,0 +1,188 @@ +package archiver + +import ( + "archive/tar" + "archive/zip" + "bytes" + "compress/bzip2" + "compress/gzip" + "github.com/ulikunitz/xz" + "io" + "os" + "path/filepath" +) + +// OpenFS opens an archive file from a given fs.FS and returns an Archive struct. +// It takes fs.FS, Type and a path to the archive file as its parameters. +// The function returns an Archive struct and an error, if any. +func OpenFS(filesystem Filesystem, archiveType Type, path string) (*Archive, error) { + return openArchive(filesystem, archiveType, path) +} + +// Open opens an archive file and returns an Archive struct. +// It takes a Type and a path to the archive file as its parameters. +// The function returns an Archive struct and an error, if any. +func Open(archiveType Type, path string) (*Archive, error) { + archiveFolderPath, err := filepath.Abs(filepath.Dir(path)) + if err != nil { + return nil, err + } + return openArchive(WrapPath(archiveFolderPath), archiveType, path) +} + +func openArchive(filesystem Filesystem, archiveType Type, path string) (*Archive, error) { + archive := new(Archive) + archive.files = make(map[string]*File) + + var file *bytes.Reader + + if filesystem.billyFS != nil { + billyFS := filesystem.billyFS + archiveFile, err := billyFS.Open(path) + if err != nil { + return nil, err + } + + fileData, err := io.ReadAll(archiveFile) + if err != nil { + return nil, err + } + file = bytes.NewReader(fileData) + } else if filesystem.fs != nil { + genericFS := filesystem.fs + archiveFile, err := genericFS.Open(path) + if err != nil { + return nil, err + } + fileData, err := io.ReadAll(archiveFile) + if err != nil { + return nil, err + } + file = bytes.NewReader(fileData) + } else if len(filesystem.path) != 0 { + archiveFile, err := os.Open(path) + if err != nil { + return nil, err + } + fileData, err := io.ReadAll(archiveFile) + if err != nil { + return nil, err + } + file = bytes.NewReader(fileData) + } + + if file == nil { + return nil, ErrArchiveFileNotFound + } + + switch archiveType { + case TarGzip: + gzipReader, err := gzip.NewReader(file) + if err != nil { + return nil, err + } + tarReader := tar.NewReader(gzipReader) + for { + header, err := tarReader.Next() + if err != nil && err != io.EOF { + return nil, err + } else if err == io.EOF { + if _, err := file.Seek(0, io.SeekStart); err != nil { + return nil, err + } + break + } + + _, fileName := filepath.Split(header.Name) + archive.files[header.Name] = &File{FileName: fileName, Path: header.Name, archive: archive} + } + if err := gzipReader.Reset(file); err != nil { + return nil, err + } + archive.reader = gzipReader + archive.tarReader = tar.NewReader(gzipReader) + case TarBzip: + bzipReader := bzip2.NewReader(file) + tarReader := tar.NewReader(bzipReader) + for { + header, err := tarReader.Next() + if err != nil && err != io.EOF { + return nil, err + } else if err == io.EOF { + if _, err := file.Seek(0, io.SeekStart); err != nil { + return nil, err + } + break + } + + _, fileName := filepath.Split(header.Name) + archive.files[header.Name] = &File{FileName: fileName, Path: header.Name, archive: archive} + } + bzipReader = bzip2.NewReader(file) + archive.reader = bzipReader + archive.tarReader = tar.NewReader(bzipReader) + case TarXz: + xzReader, err := xz.NewReader(file) + if err != nil { + return nil, err + } + tarReader := tar.NewReader(xzReader) + for { + header, err := tarReader.Next() + if err != nil && err != io.EOF { + return nil, err + } else if err == io.EOF { + if _, err := file.Seek(0, io.SeekStart); err != nil { + return nil, err + } + break + } + + _, fileName := filepath.Split(header.Name) + archive.files[header.Name] = &File{FileName: fileName, Path: header.Name, archive: archive} + } + xzReader, err = xz.NewReader(file) + if err != nil { + return nil, err + } + archive.reader = xzReader + archive.tarReader = tar.NewReader(xzReader) + case Tar: + tarReader := tar.NewReader(file) + for { + header, err := tarReader.Next() + if err != nil && err != io.EOF { + return nil, err + } else if err == io.EOF { + if _, err := file.Seek(0, io.SeekStart); err != nil { + return nil, err + } + break + } + + _, fileName := filepath.Split(header.Name) + archive.files[header.Name] = &File{FileName: fileName, Path: header.Name, archive: archive} + } + archive.reader = file + archive.tarReader = tar.NewReader(file) + case Zip: + reader, err := zip.NewReader(file, file.Size()) + if err != nil { + return nil, err + } + + archive.reader = reader + for _, file := range reader.File { + _, fileName := filepath.Split(file.Name) + archive.files[file.Name] = &File{FileName: fileName, Path: file.Name, archive: archive} + } + default: + return nil, ErrArchiveTypeNotSupported + } + + archive.Type = archiveType + archive.Path = path + archive.archiveFile = file + + return archive, nil +} diff --git a/read.go b/read.go new file mode 100644 index 0000000..2ef86a4 --- /dev/null +++ b/read.go @@ -0,0 +1,98 @@ +package archiver + +import ( + "archive/tar" + "archive/zip" + "bytes" + "compress/bzip2" + "compress/gzip" + "github.com/ulikunitz/xz" + "io" +) + +func zipRead(file *File) ([]byte, error) { + isFound := false + var fileBuffer bytes.Buffer + for _, zipF := range file.archive.reader.(*zip.Reader).File { + if file.Path != zipF.Name { + continue + } + isFound = true + + archiveFile, err := zipF.Open() + if err != nil { + return nil, err + } + defer archiveFile.Close() + + if _, err := io.Copy(&fileBuffer, archiveFile); err != nil { + return nil, err + } + + break + } + if !isFound { + return nil, ErrArchiveFileNotFound + } + + return fileBuffer.Bytes(), nil +} + +func tarRead(file *File) ([]byte, error) { + defer tarCleanup(file.archive) + + var fileBuffer bytes.Buffer + isFound := false + for { + header, err := file.archive.tarReader.Next() + if err != nil && err != io.EOF { + return nil, err + } else if err == io.EOF { + break + } + + if file.Path != header.Name { + continue + } + isFound = true + + if _, err := io.Copy(&fileBuffer, file.archive.tarReader); err != nil { + return nil, err + } + + break + } + + if !isFound { + return nil, ErrArchiveFileNotFound + } + + return fileBuffer.Bytes(), nil +} + +func tarCleanup(archive *Archive) error { + if _, err := archive.archiveFile.Seek(0, io.SeekStart); err != nil { + return err + } + switch archive.Type { + case TarGzip: + err := archive.reader.(*gzip.Reader).Reset(archive.archiveFile) + if err != nil { + return err + } + archive.tarReader = tar.NewReader(archive.reader.(*gzip.Reader)) + case TarBzip: + archive.reader = bzip2.NewReader(archive.archiveFile) + archive.tarReader = tar.NewReader(archive.reader.(io.Reader)) + case TarXz: + var err error + archive.reader, err = xz.NewReader(archive.archiveFile) + if err != nil { + return err + } + archive.tarReader = tar.NewReader(archive.reader.(*xz.Reader)) + case Tar: + archive.tarReader = tar.NewReader(archive.reader.(*bytes.Reader)) + } + return nil +}