automatically get archive type via magic bytes

This commit is contained in:
Shane C. 2024-12-03 12:32:15 -05:00
parent 939d31dc37
commit 0152c92949
Signed by: Shane C.
GPG key ID: E46B5FEA35B22FF9
4 changed files with 120 additions and 26 deletions

View file

@ -4,7 +4,7 @@ A golang library for extracting and creating archives.
[Documentation Link](https://pkg.go.dev/egtyl.xyz/omnibill/archiver) [Documentation Link](https://pkg.go.dev/egtyl.xyz/omnibill/archiver)
## Roadmap ## Roadmap
- [ ] Automatically detect the archive format - [x] Automatically detect the archive format
- [x] Add mutexes to work with concurrency - [x] Add mutexes to work with concurrency
## Supported Formats ## Supported Formats

View file

@ -47,7 +47,7 @@ func TestArchiveFS(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
t.Log("== OpenArchive ==") t.Log("== OpenArchive ==")
archive, err := OpenFS(WrapBillyFS(memoryFS), TarGzip, "archive.tar.gz") archive, err := OpenFS(WrapBillyFS(memoryFS), "archive.tar.gz")
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, 10, archive.FileCount()) assert.Equal(t, 10, archive.FileCount())
@ -93,7 +93,7 @@ func TestExtractArchiveBillyFS(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
testGenerateTar(t) testGenerateTar(t)
archive, err := Open(Tar, testArchiveBaseDir+"/test.tar") archive, err := Open(testArchiveBaseDir + "/test.tar")
assert.NoError(t, err) assert.NoError(t, err)
memoryFS := memfs.New() memoryFS := memfs.New()

View file

@ -26,7 +26,7 @@ func TestArchiveConcurrency(t *testing.T) {
testGenerateZip(t) testGenerateZip(t)
archive, err := Open(Zip, testArchiveBaseDir+"/test.zip") archive, err := Open(testArchiveBaseDir + "/test.zip")
assert.NoError(t, err) assert.NoError(t, err)
var wg sync.WaitGroup var wg sync.WaitGroup
@ -87,31 +87,31 @@ func TestArchiveExtract(t *testing.T) {
TestArchiveTarGzip(t) TestArchiveTarGzip(t)
t.Log("== ExtractZip ==") t.Log("== ExtractZip ==")
err = Extract(Zip, testArchiveBaseDir+"/test.zip", ExtractOptions{ err = Extract(testArchiveBaseDir+"/test.zip", ExtractOptions{
Folder: testArchiveBaseDir + "/extracted/zip", Folder: testArchiveBaseDir + "/extracted/zip",
}) })
assert.NoError(t, err) assert.NoError(t, err)
t.Log("== ExtractTar ==") t.Log("== ExtractTar ==")
err = Extract(Tar, testArchiveBaseDir+"/test.tar", ExtractOptions{ err = Extract(testArchiveBaseDir+"/test.tar", ExtractOptions{
Folder: testArchiveBaseDir + "/extracted/tar", Folder: testArchiveBaseDir + "/extracted/tar",
}) })
assert.NoError(t, err) assert.NoError(t, err)
t.Log("== ExtractTarXz ==") t.Log("== ExtractTarXz ==")
err = Extract(TarXz, testArchiveBaseDir+"/test.tar.xz", ExtractOptions{ err = Extract(testArchiveBaseDir+"/test.tar.xz", ExtractOptions{
Folder: testArchiveBaseDir + "/extracted/xz", Folder: testArchiveBaseDir + "/extracted/xz",
}) })
assert.NoError(t, err) assert.NoError(t, err)
t.Log("== ExtractTarBz ==") t.Log("== ExtractTarBz ==")
err = Extract(TarBzip, testArchiveBaseDir+"/test.tar.bz2", ExtractOptions{ err = Extract(testArchiveBaseDir+"/test.tar.bz2", ExtractOptions{
Folder: testArchiveBaseDir + "/extracted/bzip", Folder: testArchiveBaseDir + "/extracted/bzip",
}) })
assert.NoError(t, err) assert.NoError(t, err)
t.Log("== ExtractTarGz ==") t.Log("== ExtractTarGz ==")
err = Extract(TarGzip, testArchiveBaseDir+"/test.tar.gz", ExtractOptions{ err = Extract(testArchiveBaseDir+"/test.tar.gz", ExtractOptions{
Folder: testArchiveBaseDir + "/extracted/gz", Folder: testArchiveBaseDir + "/extracted/gz",
}) })
assert.NoError(t, err) assert.NoError(t, err)
@ -127,7 +127,7 @@ func TestArchiveZip(t *testing.T) {
testGenerateZip(t) testGenerateZip(t)
t.Log("== OpenArchive ==") t.Log("== OpenArchive ==")
archive, err := Open(Zip, testArchiveBaseDir+"/test.zip") archive, err := Open(testArchiveBaseDir + "/test.zip")
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, archive.FileCount(), 10) assert.Equal(t, archive.FileCount(), 10)
@ -171,7 +171,7 @@ func TestArchiveTarGzip(t *testing.T) {
testGenerateTarGzip(t) testGenerateTarGzip(t)
t.Log("== OpenArchive ==") t.Log("== OpenArchive ==")
archive, err := Open(TarGzip, testArchiveBaseDir+"/test.tar.gz") archive, err := Open(testArchiveBaseDir + "/test.tar.gz")
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, archive.FileCount(), 10) assert.Equal(t, archive.FileCount(), 10)
@ -218,7 +218,7 @@ func TestArchiveTarBzip(t *testing.T) {
testGenerateTarBzip(t) testGenerateTarBzip(t)
t.Log("== OpenArchive ==") t.Log("== OpenArchive ==")
archive, err := Open(TarBzip, testArchiveBaseDir+"/test.tar.bz2") archive, err := Open(testArchiveBaseDir + "/test.tar.bz2")
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, archive.FileCount(), 10) assert.Equal(t, archive.FileCount(), 10)
@ -265,7 +265,7 @@ func TestArchiveTarXz(t *testing.T) {
testGenerateTarXz(t) testGenerateTarXz(t)
t.Log("== OpenArchive ==") t.Log("== OpenArchive ==")
archive, err := Open(TarXz, testArchiveBaseDir+"/test.tar.xz") archive, err := Open(testArchiveBaseDir + "/test.tar.xz")
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, archive.FileCount(), 10) assert.Equal(t, archive.FileCount(), 10)
@ -312,7 +312,7 @@ func TestArchiveTar(t *testing.T) {
testGenerateTar(t) testGenerateTar(t)
t.Log("== OpenArchive ==") t.Log("== OpenArchive ==")
archive, err := Open(Tar, testArchiveBaseDir+"/test.tar") archive, err := Open(testArchiveBaseDir + "/test.tar")
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, 10, archive.FileCount()) assert.Equal(t, 10, archive.FileCount())

118
open.go
View file

@ -12,29 +12,50 @@ import (
"path/filepath" "path/filepath"
) )
var magicByteLookup = map[Type][][]byte{
Tar: {
{0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30},
{0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00},
},
TarGzip: {
{0x1F, 0x8B},
},
TarXz: {
{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00},
},
TarBzip: {
{0x42, 0x5A, 0x68},
},
Zip: {
{0x50, 0x4B, 0x03, 0x04},
{0x50, 0x4B, 0x05, 0x06},
{0x50, 0x4B, 0x07, 0x08},
},
}
// OpenFS opens an archive file from a given fs.FS and returns an Archive struct. // OpenFS opens an archive file from a given fs.FS and returns an Archive struct.
// It takes fs.FS, Type and a path to the archive file as its parameters. // It takes fs.FS, and a path to the archive file as its parameters.
// The function returns an Archive struct and an error, if any. // The function returns an Archive struct and an error, if any.
func OpenFS(filesystem Filesystem, archiveType Type, path string) (*Archive, error) { func OpenFS(filesystem Filesystem, path string) (*Archive, error) {
return openArchive(filesystem, archiveType, path) return openArchive(filesystem, path)
} }
// Open opens an archive file and returns an Archive struct. // Open opens an archive file and returns an Archive struct.
// It takes a Type and a path to the archive file as its parameters. // It takes a path to the archive file as its parameter.
// The function returns an Archive struct and an error, if any. // The function returns an Archive struct and an error, if any.
func Open(archiveType Type, path string) (*Archive, error) { func Open(path string) (*Archive, error) {
archiveFolderPath, err := filepath.Abs(filepath.Dir(path)) archiveFolderPath, err := filepath.Abs(filepath.Dir(path))
if err != nil { if err != nil {
return nil, err return nil, err
} }
return openArchive(WrapPath(archiveFolderPath), archiveType, path) return openArchive(WrapPath(archiveFolderPath), path)
} }
// Extract opens an archive file and extracts the contents. // Extract opens an archive file and extracts the contents.
// It takes a Type, a path, and ExtractOptions for its parameters. // It takes a path, and ExtractOptions for its parameters.
// The function returns an error, if any. // The function returns an error, if any.
func Extract(archiveType Type, path string, options ExtractOptions) error { func Extract(path string, options ExtractOptions) error {
archiveFile, err := Open(archiveType, path) archiveFile, err := Open(path)
if err != nil { if err != nil {
return err return err
} }
@ -48,7 +69,7 @@ func Extract(archiveType Type, path string, options ExtractOptions) error {
return nil return nil
} }
func openArchive(filesystem Filesystem, archiveType Type, path string) (*Archive, error) { func openArchive(filesystem Filesystem, path string) (*Archive, error) {
archive := new(Archive) archive := new(Archive)
archive.files = make(map[string]*File) archive.files = make(map[string]*File)
@ -93,7 +114,80 @@ func openArchive(filesystem Filesystem, archiveType Type, path string) (*Archive
return nil, ErrArchiveFileNotFound return nil, ErrArchiveFileNotFound
} }
switch archiveType { magicBytes := make([]byte, 512)
if _, err := file.Read(magicBytes); err != nil {
return nil, err
}
if _, err := file.Seek(0, io.SeekStart); err != nil {
return nil, err
}
var foundArchiveType Type
for archiveT, fileBytes := range magicByteLookup {
if len(foundArchiveType) != 0 {
break
}
for _, b := range fileBytes {
var magicByteWithOffset []byte
if archiveT == Tar {
magicByteWithOffset = magicBytes[257 : 257+len(b)]
} else {
magicByteWithOffset = magicBytes[0:len(b)]
}
if bytes.Equal(b, magicByteWithOffset) {
foundArchiveType = archiveT
break
}
switch foundArchiveType {
case TarGzip:
gzipReader, err := gzip.NewReader(bytes.NewReader(b))
if err != nil {
return nil, err
}
fileMagicBytes := make([]byte, 512)
if _, err := gzipReader.Read(fileMagicBytes); err != nil {
return nil, err
}
if !bytes.Equal(b, fileMagicBytes[257:257+len(b)]) {
return nil, ErrArchiveTypeNotSupported
}
if err := gzipReader.Close(); err != nil {
return nil, err
}
case TarXz:
xzReader, err := xz.NewReader(bytes.NewReader(b))
if err != nil {
return nil, err
}
fileMagicBytes := make([]byte, 512)
if _, err := xzReader.Read(fileMagicBytes); err != nil {
return nil, err
}
if !bytes.Equal(b, fileMagicBytes[257:257+len(b)]) {
return nil, ErrArchiveTypeNotSupported
}
case TarBzip:
bzReader := bzip2.NewReader(bytes.NewReader(b))
fileMagicBytes := make([]byte, 512)
if _, err := bzReader.Read(fileMagicBytes); err != nil {
return nil, err
}
if !bytes.Equal(b, fileMagicBytes[257:257+len(b)]) {
return nil, ErrArchiveTypeNotSupported
}
}
}
}
if len(foundArchiveType) == 0 {
return nil, ErrArchiveTypeNotSupported
}
switch foundArchiveType {
case TarGzip: case TarGzip:
gzipReader, err := gzip.NewReader(file) gzipReader, err := gzip.NewReader(file)
if err != nil { if err != nil {
@ -198,7 +292,7 @@ func openArchive(filesystem Filesystem, archiveType Type, path string) (*Archive
return nil, ErrArchiveTypeNotSupported return nil, ErrArchiveTypeNotSupported
} }
archive.Type = archiveType archive.Type = foundArchiveType
archive.Path = path archive.Path = path
archive.archiveFile = file archive.archiveFile = file