diff --git a/README.md b/README.md index 34f920d..f686a5f 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A golang library for extracting and creating archives. [Documentation Link](https://pkg.go.dev/egtyl.xyz/omnibill/archiver) ## Roadmap -- [ ] Automatically detect the archive format +- [x] Automatically detect the archive format - [x] Add mutexes to work with concurrency ## Supported Formats diff --git a/archive_fs_test.go b/archive_fs_test.go index 7e5c74b..dc6f77b 100644 --- a/archive_fs_test.go +++ b/archive_fs_test.go @@ -47,7 +47,7 @@ func TestArchiveFS(t *testing.T) { assert.NoError(t, err) t.Log("== OpenArchive ==") - archive, err := OpenFS(WrapBillyFS(memoryFS), TarGzip, "archive.tar.gz") + archive, err := OpenFS(WrapBillyFS(memoryFS), "archive.tar.gz") assert.NoError(t, err) assert.Equal(t, 10, archive.FileCount()) @@ -93,7 +93,7 @@ func TestExtractArchiveBillyFS(t *testing.T) { assert.NoError(t, err) testGenerateTar(t) - archive, err := Open(Tar, testArchiveBaseDir+"/test.tar") + archive, err := Open(testArchiveBaseDir + "/test.tar") assert.NoError(t, err) memoryFS := memfs.New() diff --git a/archive_test.go b/archive_test.go index 8cf5a25..293f311 100644 --- a/archive_test.go +++ b/archive_test.go @@ -26,7 +26,7 @@ func TestArchiveConcurrency(t *testing.T) { testGenerateZip(t) - archive, err := Open(Zip, testArchiveBaseDir+"/test.zip") + archive, err := Open(testArchiveBaseDir + "/test.zip") assert.NoError(t, err) var wg sync.WaitGroup @@ -87,31 +87,31 @@ func TestArchiveExtract(t *testing.T) { TestArchiveTarGzip(t) t.Log("== ExtractZip ==") - err = Extract(Zip, testArchiveBaseDir+"/test.zip", ExtractOptions{ + err = Extract(testArchiveBaseDir+"/test.zip", ExtractOptions{ Folder: testArchiveBaseDir + "/extracted/zip", }) assert.NoError(t, err) t.Log("== ExtractTar ==") - err = Extract(Tar, testArchiveBaseDir+"/test.tar", ExtractOptions{ + err = Extract(testArchiveBaseDir+"/test.tar", ExtractOptions{ Folder: testArchiveBaseDir + "/extracted/tar", }) assert.NoError(t, err) t.Log("== ExtractTarXz ==") - err = Extract(TarXz, testArchiveBaseDir+"/test.tar.xz", ExtractOptions{ + err = Extract(testArchiveBaseDir+"/test.tar.xz", ExtractOptions{ Folder: testArchiveBaseDir + "/extracted/xz", }) assert.NoError(t, err) t.Log("== ExtractTarBz ==") - err = Extract(TarBzip, testArchiveBaseDir+"/test.tar.bz2", ExtractOptions{ + err = Extract(testArchiveBaseDir+"/test.tar.bz2", ExtractOptions{ Folder: testArchiveBaseDir + "/extracted/bzip", }) assert.NoError(t, err) t.Log("== ExtractTarGz ==") - err = Extract(TarGzip, testArchiveBaseDir+"/test.tar.gz", ExtractOptions{ + err = Extract(testArchiveBaseDir+"/test.tar.gz", ExtractOptions{ Folder: testArchiveBaseDir + "/extracted/gz", }) assert.NoError(t, err) @@ -127,7 +127,7 @@ func TestArchiveZip(t *testing.T) { testGenerateZip(t) t.Log("== OpenArchive ==") - archive, err := Open(Zip, testArchiveBaseDir+"/test.zip") + archive, err := Open(testArchiveBaseDir + "/test.zip") assert.NoError(t, err) assert.Equal(t, archive.FileCount(), 10) @@ -171,7 +171,7 @@ func TestArchiveTarGzip(t *testing.T) { testGenerateTarGzip(t) t.Log("== OpenArchive ==") - archive, err := Open(TarGzip, testArchiveBaseDir+"/test.tar.gz") + archive, err := Open(testArchiveBaseDir + "/test.tar.gz") assert.NoError(t, err) assert.Equal(t, archive.FileCount(), 10) @@ -218,7 +218,7 @@ func TestArchiveTarBzip(t *testing.T) { testGenerateTarBzip(t) t.Log("== OpenArchive ==") - archive, err := Open(TarBzip, testArchiveBaseDir+"/test.tar.bz2") + archive, err := Open(testArchiveBaseDir + "/test.tar.bz2") assert.NoError(t, err) assert.Equal(t, archive.FileCount(), 10) @@ -265,7 +265,7 @@ func TestArchiveTarXz(t *testing.T) { testGenerateTarXz(t) t.Log("== OpenArchive ==") - archive, err := Open(TarXz, testArchiveBaseDir+"/test.tar.xz") + archive, err := Open(testArchiveBaseDir + "/test.tar.xz") assert.NoError(t, err) assert.Equal(t, archive.FileCount(), 10) @@ -312,7 +312,7 @@ func TestArchiveTar(t *testing.T) { testGenerateTar(t) t.Log("== OpenArchive ==") - archive, err := Open(Tar, testArchiveBaseDir+"/test.tar") + archive, err := Open(testArchiveBaseDir + "/test.tar") assert.NoError(t, err) assert.Equal(t, 10, archive.FileCount()) diff --git a/open.go b/open.go index c119c24..4953d73 100644 --- a/open.go +++ b/open.go @@ -12,29 +12,50 @@ import ( "path/filepath" ) +var magicByteLookup = map[Type][][]byte{ + Tar: { + {0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30}, + {0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00}, + }, + TarGzip: { + {0x1F, 0x8B}, + }, + TarXz: { + {0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}, + }, + TarBzip: { + {0x42, 0x5A, 0x68}, + }, + Zip: { + {0x50, 0x4B, 0x03, 0x04}, + {0x50, 0x4B, 0x05, 0x06}, + {0x50, 0x4B, 0x07, 0x08}, + }, +} + // OpenFS opens an archive file from a given fs.FS and returns an Archive struct. -// It takes fs.FS, Type and a path to the archive file as its parameters. +// It takes fs.FS, and a path to the archive file as its parameters. // The function returns an Archive struct and an error, if any. -func OpenFS(filesystem Filesystem, archiveType Type, path string) (*Archive, error) { - return openArchive(filesystem, archiveType, path) +func OpenFS(filesystem Filesystem, path string) (*Archive, error) { + return openArchive(filesystem, path) } // Open opens an archive file and returns an Archive struct. -// It takes a Type and a path to the archive file as its parameters. +// It takes a path to the archive file as its parameter. // The function returns an Archive struct and an error, if any. -func Open(archiveType Type, path string) (*Archive, error) { +func Open(path string) (*Archive, error) { archiveFolderPath, err := filepath.Abs(filepath.Dir(path)) if err != nil { return nil, err } - return openArchive(WrapPath(archiveFolderPath), archiveType, path) + return openArchive(WrapPath(archiveFolderPath), path) } // Extract opens an archive file and extracts the contents. -// It takes a Type, a path, and ExtractOptions for its parameters. +// It takes a path, and ExtractOptions for its parameters. // The function returns an error, if any. -func Extract(archiveType Type, path string, options ExtractOptions) error { - archiveFile, err := Open(archiveType, path) +func Extract(path string, options ExtractOptions) error { + archiveFile, err := Open(path) if err != nil { return err } @@ -48,7 +69,7 @@ func Extract(archiveType Type, path string, options ExtractOptions) error { return nil } -func openArchive(filesystem Filesystem, archiveType Type, path string) (*Archive, error) { +func openArchive(filesystem Filesystem, path string) (*Archive, error) { archive := new(Archive) archive.files = make(map[string]*File) @@ -93,7 +114,80 @@ func openArchive(filesystem Filesystem, archiveType Type, path string) (*Archive return nil, ErrArchiveFileNotFound } - switch archiveType { + magicBytes := make([]byte, 512) + if _, err := file.Read(magicBytes); err != nil { + return nil, err + } + + if _, err := file.Seek(0, io.SeekStart); err != nil { + return nil, err + } + + var foundArchiveType Type + + for archiveT, fileBytes := range magicByteLookup { + if len(foundArchiveType) != 0 { + break + } + for _, b := range fileBytes { + var magicByteWithOffset []byte + if archiveT == Tar { + magicByteWithOffset = magicBytes[257 : 257+len(b)] + } else { + magicByteWithOffset = magicBytes[0:len(b)] + } + if bytes.Equal(b, magicByteWithOffset) { + foundArchiveType = archiveT + break + } + + switch foundArchiveType { + case TarGzip: + gzipReader, err := gzip.NewReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + fileMagicBytes := make([]byte, 512) + if _, err := gzipReader.Read(fileMagicBytes); err != nil { + return nil, err + } + if !bytes.Equal(b, fileMagicBytes[257:257+len(b)]) { + return nil, ErrArchiveTypeNotSupported + } + if err := gzipReader.Close(); err != nil { + return nil, err + } + case TarXz: + xzReader, err := xz.NewReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + fileMagicBytes := make([]byte, 512) + if _, err := xzReader.Read(fileMagicBytes); err != nil { + return nil, err + } + if !bytes.Equal(b, fileMagicBytes[257:257+len(b)]) { + return nil, ErrArchiveTypeNotSupported + } + case TarBzip: + bzReader := bzip2.NewReader(bytes.NewReader(b)) + + fileMagicBytes := make([]byte, 512) + if _, err := bzReader.Read(fileMagicBytes); err != nil { + return nil, err + } + if !bytes.Equal(b, fileMagicBytes[257:257+len(b)]) { + return nil, ErrArchiveTypeNotSupported + } + } + } + } + + if len(foundArchiveType) == 0 { + return nil, ErrArchiveTypeNotSupported + } + + switch foundArchiveType { case TarGzip: gzipReader, err := gzip.NewReader(file) if err != nil { @@ -198,7 +292,7 @@ func openArchive(filesystem Filesystem, archiveType Type, path string) (*Archive return nil, ErrArchiveTypeNotSupported } - archive.Type = archiveType + archive.Type = foundArchiveType archive.Path = path archive.archiveFile = file