Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@

# Don't mess with .ar files
*.ar binary
*.deb binary
*.deb binary
*.arj binary
*.arc binary
*.ace binary
9 changes: 9 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ RecursiveExtractor is a cross-platform .NET library and CLI tool for parsing arc

## Building and Testing

### Git Clone Depth

⚠️ **Important**: This repository uses [Nerdbank.GitVersioning](https://github.com/dotnet/Nerdbank.GitVersioning) (NBGV) to calculate version numbers from git history. Shallow clones will cause the build to fail with a `GitException: Shallow clone lacks the objects required to calculate version height` error. If you encounter this, deepen the clone:
```bash
git fetch --unshallow
# or if that fails:
git fetch --depth=100
```

### Build Commands
```bash
# Build the entire solution
Expand Down
26 changes: 19 additions & 7 deletions RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public static TheoryData<string, int> ArchiveData
{ "TestDataArchivesNested.Zip", 54 },
{ "UdfTest.iso", 3 },
{ "UdfTestWithMultiSystem.iso", 3 },
{ "TestData.arj", 1 },
{ "TestData.arc", 1 },
{ "TestData.ace", 1 },
{ "NestedFormatsTest.zip", 2 },
// { "HfsSampleUDCO.dmg", 2 }
};
}
Expand Down Expand Up @@ -75,6 +79,10 @@ public static TheoryData<string, int> NoRecursionData
{ "EmptyFile.txt", 1 },
{ "TestDataArchivesNested.Zip", 14 },
{ "UdfTestWithMultiSystem.iso", 3 },
{ "TestData.arj", 1 },
{ "TestData.arc", 1 },
{ "TestData.ace", 1 },
{ "NestedFormatsTest.zip", 1 },
// { "HfsSampleUDCO.dmg", 2 }
};
}
Expand Down Expand Up @@ -193,8 +201,7 @@ public void ExtractArchiveParallel(string fileName, int expectedNumFiles)
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName);
var results = extractor.Extract(path, GetExtractorOptions(true)).ToList();
var names = results.Select(x => x.FullPath);
var stringOfNames = string.Join("\n", names);
Assert.DoesNotContain(results, r => r.EntryStatus == FileEntryStatus.FailedArchive);
Assert.Equal(expectedNumFiles, results.Count);
}

Expand Down Expand Up @@ -223,17 +230,22 @@ public async Task ExtractArchiveAsync(string fileName, int expectedNumFiles)
[MemberData(nameof(ArchiveData))]
public async Task ExtractArchiveFromStreamAsync(string fileName, int expectedNumFiles)
{
var extractor = new Extractor();
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName);
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
var results = extractor.ExtractAsync(path, stream, new ExtractorOptions());
var numFiles = 0;
var numFailed = 0;
await foreach (var result in results)
{
numFiles++;
if (result.EntryStatus == FileEntryStatus.FailedArchive)
{
numFailed++;
}
}
Assert.Equal(0, numFailed);
Assert.Equal(expectedNumFiles, numFiles);
stream.Close();
}

[Theory]
Expand All @@ -243,9 +255,9 @@ public void ExtractArchiveFromStream(string fileName, int expectedNumFiles)
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName);
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
var results = extractor.Extract(path, stream, GetExtractorOptions());
Assert.Equal(expectedNumFiles, results.Count());
stream.Close();
var resultsList = extractor.Extract(path, stream, GetExtractorOptions()).ToList();
Assert.DoesNotContain(resultsList, r => r.EntryStatus == FileEntryStatus.FailedArchive);
Assert.Equal(expectedNumFiles, resultsList.Count);
}

[Theory]
Expand Down
3 changes: 3 additions & 0 deletions RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ public class MiniMagicTests
[InlineData("Empty.vmdk", ArchiveFileType.VMDK)]
[InlineData("HfsSampleUDCO.dmg", ArchiveFileType.DMG)]
[InlineData("EmptyFile.txt", ArchiveFileType.UNKNOWN)]
[InlineData("TestData.arj", ArchiveFileType.ARJ)]
[InlineData("TestData.arc", ArchiveFileType.ARC)]
[InlineData("TestData.ace", ArchiveFileType.ACE)]
public void TestMiniMagic(string fileName, ArchiveFileType expectedArchiveFileType)
{
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.

using Microsoft.CST.RecursiveExtractor;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using Xunit;

namespace RecursiveExtractor.Tests.ExtractorTests
{
/// <summary>
/// Tests that validate content integrity through deeply nested extraction
/// across multiple new archive formats (ZIP → ACE → ARC → ARJ → TAR).
/// </summary>
public class NestedFormatsContentTests
{
private const string NestedArchiveFileName = "NestedFormatsTest.zip";
private const string File1ExpectedContent = "Hello from File1. This is a test file for nested archive extraction.\n";
private const string File2ExpectedContent = "Greetings from File2. This verifies content integrity after recursive extraction.\n";

private string GetArchivePath() =>
Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", NestedArchiveFileName);

[Fact]
public void ExtractNestedFormatsContentSync()
{
var extractor = new Extractor();
var path = GetArchivePath();
var results = extractor.Extract(path, new ExtractorOptions()).ToList();

Assert.Equal(2, results.Count);
Assert.DoesNotContain(results, r => r.EntryStatus == FileEntryStatus.FailedArchive);

var file1 = results.FirstOrDefault(r => r.FullPath.EndsWith("file1.txt"));
var file2 = results.FirstOrDefault(r => r.FullPath.EndsWith("file2.txt"));

Assert.NotNull(file1);
Assert.NotNull(file2);

using var reader1 = new StreamReader(file1.Content);
Assert.Equal(File1ExpectedContent, reader1.ReadToEnd());

using var reader2 = new StreamReader(file2.Content);
Assert.Equal(File2ExpectedContent, reader2.ReadToEnd());
}

[Fact]
public async Task ExtractNestedFormatsContentAsync()
{
var extractor = new Extractor();
var path = GetArchivePath();
var results = await extractor.ExtractAsync(path, new ExtractorOptions()).ToListAsync();

Assert.Equal(2, results.Count);
Assert.DoesNotContain(results, r => r.EntryStatus == FileEntryStatus.FailedArchive);

var file1 = results.FirstOrDefault(r => r.FullPath.EndsWith("file1.txt"));
var file2 = results.FirstOrDefault(r => r.FullPath.EndsWith("file2.txt"));

Assert.NotNull(file1);
Assert.NotNull(file2);

using var reader1 = new StreamReader(file1.Content);
Assert.Equal(File1ExpectedContent, reader1.ReadToEnd());

using var reader2 = new StreamReader(file2.Content);
Assert.Equal(File2ExpectedContent, reader2.ReadToEnd());
}
}
}
12 changes: 12 additions & 0 deletions RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,18 @@
<None Update="TestData\TestDataArchives\UdfTestWithMultiSystem.iso">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\TestDataArchives\TestData.arj">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\TestDataArchives\TestData.arc">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\TestDataArchives\TestData.ace">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\TestDataArchives\NestedFormatsTest.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Bombs\zoneinfo-2010g.tar">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 3 additions & 0 deletions RecursiveExtractor/Extractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ public void SetDefaultExtractors()
SetExtractor(ArchiveFileType.VMDK, new VmdkExtractor(this));
SetExtractor(ArchiveFileType.XZ, new XzExtractor(this));
SetExtractor(ArchiveFileType.ZIP, new ZipExtractor(this));
SetExtractor(ArchiveFileType.ARJ, new ArjExtractor(this));
SetExtractor(ArchiveFileType.ARC, new ArcExtractor(this));
SetExtractor(ArchiveFileType.ACE, new AceExtractor(this));
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
SetExtractor(ArchiveFileType.WIM, new WimExtractor(this));
Expand Down
175 changes: 175 additions & 0 deletions RecursiveExtractor/Extractors/AceExtractor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
using SharpCompress.Readers;
using SharpCompress.Readers.Ace;
using System;
using System.Collections.Generic;
using System.IO;

namespace Microsoft.CST.RecursiveExtractor.Extractors
{
/// <summary>
/// The ACE Archive extractor implementation
/// </summary>
public class AceExtractor : AsyncExtractorInterface
{
/// <summary>
/// The constructor takes the Extractor context for recursion.
/// </summary>
/// <param name="context">The Extractor context.</param>
public AceExtractor(Extractor context)
{
Context = context;
}
private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();

internal Extractor Context { get; }

/// <summary>
/// Extracts an ACE archive
/// </summary>
///<inheritdoc />
public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true)
{
AceReader? aceReader = null;
try
{
fileEntry.Content.Position = 0;
aceReader = AceReader.Open(fileEntry.Content, new ReaderOptions()
{
LeaveStreamOpen = true
});
}
catch (Exception e)
{
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath, string.Empty, e.GetType());
}

if (aceReader != null)
{
using (aceReader)
{
while (aceReader.MoveToNextEntry())
{
var entry = aceReader.Entry;
if (entry.IsDirectory)
{
continue;
}

var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar);
if (string.IsNullOrEmpty(name))
{
Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath);
continue;
}

governor.CheckResourceGovernor(entry.Size);
using (var entryStream = aceReader.OpenEntryStream())
{
var newFileEntry = await FileEntry.FromStreamAsync(name, entryStream, fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false);
if (newFileEntry != null)
{
if (options.Recurse || topLevel)
{
await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false))
{
yield return innerEntry;
}
}
else
{
yield return newFileEntry;
}
}
}
}
}
}
else
{
if (options.ExtractSelfOnFail)
{
fileEntry.EntryStatus = FileEntryStatus.FailedArchive;
yield return fileEntry;
}
}
}

/// <summary>
/// Extracts an ACE archive
/// </summary>
///<inheritdoc />
public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true)
{
AceReader? aceReader = null;
try
{
fileEntry.Content.Position = 0;
aceReader = AceReader.Open(fileEntry.Content, new ReaderOptions()
{
LeaveStreamOpen = true
});
}
catch (Exception e)
{
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath, string.Empty, e.GetType());
}

if (aceReader != null)
{
using (aceReader)
{
while (aceReader.MoveToNextEntry())
{
var entry = aceReader.Entry;
if (entry.IsDirectory)
{
continue;
}

FileEntry? newFileEntry = null;
try
{
governor.CheckResourceGovernor(entry.Size);
using (var stream = aceReader.OpenEntryStream())
{
var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar);
if (string.IsNullOrEmpty(name))
{
Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath);
continue;
}
newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff);
}
}
catch (Exception e)
{
Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath, entry.Key, e.GetType());
}
if (newFileEntry != null)
{
if (options.Recurse || topLevel)
{
foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false))
{
yield return innerEntry;
}
}
else
{
yield return newFileEntry;
}
}
}
}
}
else
{
if (options.ExtractSelfOnFail)
{
fileEntry.EntryStatus = FileEntryStatus.FailedArchive;
yield return fileEntry;
}
}
}
}
}
Loading