summaryrefslogtreecommitdiff
path: root/src/main.zig
diff options
context:
space:
mode:
authorMartin Ashby <martin@ashbysoft.com>2023-09-15 23:36:00 +0100
committerMartin Ashby <martin@ashbysoft.com>2023-09-15 23:36:00 +0100
commitbce4a48af18f355d60de81eca1ae591dbbc8aa74 (patch)
tree3e83381b2e5c401ff8f31060c4cdeecb8968f388 /src/main.zig
parent3fb260c67ed2812f24cadb154fa10cfc5d1a84f8 (diff)
downloadzip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.gz
zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.bz2
zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.xz
zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.zip
initial work on linear extraction like TAR file support
Diffstat (limited to 'src/main.zig')
-rw-r--r--src/main.zig185
1 files changed, 125 insertions, 60 deletions
diff --git a/src/main.zig b/src/main.zig
index 1e006f6..cb116b9 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -66,20 +66,10 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() {
const eocdr_seek_start: usize = epos - eocdr_search_width;
try file_or_stream.seekTo(eocdr_seek_start);
var reader = file_or_stream.reader();
- const needle = @byteSwap(EndOfCentralDirectoryRecord.SIG);
- var window: u32 = try reader.readIntLittle(u32);
- while (true) {
- if (window == needle) {
- try file_or_stream.seekBy(-4);
- break;
- }
- const nb = try reader.readByte();
- window <<= 8;
- window |= nb;
- } else {
- return error.EndOfCentralDirectoryRecordNotFound;
- }
- var eocdr = try EndOfCentralDirectoryRecord.read(allocator, file_or_stream);
+ const sb = try read_to_sig(reader, EndOfCentralDirectoryRecord.SIG);
+ try file_or_stream.seekBy(sb);
+
+ var eocdr = try EndOfCentralDirectoryRecord.read(allocator, reader);
errdefer eocdr.deinit(allocator);
if (eocdr.disk_number_this != 0 or eocdr.disk_number_central_dir_start != 0) return error.SpansNotSupported;
if (eocdr.total_central_dir_entries != eocdr.total_central_dir_entries_on_this_disk) return error.SpansNotSupported;
@@ -88,7 +78,7 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() {
errdefer allocator.free(central_directory_headers);
try file_or_stream.seekTo(eocdr.central_dir_offset);
for (0..eocdr.total_central_dir_entries) |i| {
- central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, file_or_stream);
+ central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, reader);
}
return .{
@@ -97,6 +87,23 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() {
.central_directory_headers = central_directory_headers,
};
}
+
+/// returns how much to seekBy after the signature is found (becuase we'll now have read over it.)
+fn read_to_sig(reader: anytype, sig: u32) !i32 {
+ const needle = @byteSwap(sig);
+ var window: u32 = try reader.readIntLittle(u32);
+ while (true) {
+ if (window == needle) {
+ return -4;
+ }
+ const nb = try reader.readByte();
+ window <<= 8;
+ window |= nb;
+ } else {
+ return error.SignatureNotFound;
+ }
+}
+
pub fn deinit(self: *@This()) void {
self.end_of_central_directory_record.deinit(self.allocator);
for (0..self.central_directory_headers.len) |i| {
@@ -108,47 +115,19 @@ pub fn deinit(self: *@This()) void {
pub fn count_files(self: @This()) u16 {
return self.end_of_central_directory_record.total_central_dir_entries;
}
-pub fn file_name(self: @This(), index: u16) []const u8 {
+pub fn file_name(self: @This(), index: usize) []const u8 {
return self.central_directory_headers[index].file_name;
}
-pub fn file_comment(self: @This(), index: u16) []const u8 {
+pub fn file_comment(self: @This(), index: usize) []const u8 {
return self.central_directory_headers[index].file_comment;
}
-pub fn extract(self: @This(), index: u16, stream_or_file_in: anytype, stream_or_file_out: anytype) !void {
+pub fn extract(self: @This(), index: usize, stream_or_file_in: anytype, writer: anytype) !void {
const cdh = self.central_directory_headers[index];
try stream_or_file_in.seekTo(cdh.relative_offset_of_local_header);
- var lfh = try LocalFileHeader.read(self.allocator, stream_or_file_in);
- defer lfh.deinit(self.allocator);
- const is_encrypted = lfh.general_purpose_bit_flag.isSet(0);
- if (is_encrypted) return error.EncryptionNotSupported;
-
var reader = stream_or_file_in.reader();
- var lr = std.io.limitedReader(reader, lfh.compressed_size);
- var limited_reader = lr.reader();
- switch (lfh.compression_method) {
- .store => {
- var writer = stream_or_file_out.writer();
- try pump(limited_reader, writer, lfh.uncompressed_size, lfh.crc32);
- },
- .deflate => {
- var decomp = try std.compress.deflate.decompressor(self.allocator, limited_reader, null);
- defer decomp.deinit();
- var decomp_reader = decomp.reader();
- var writer = stream_or_file_out.writer();
- try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32);
- },
- .lzma => {
- var decomp = try std.compress.lzma.decompress(self.allocator, limited_reader);
- defer decomp.deinit();
- var decomp_reader = decomp.reader();
- var writer = stream_or_file_out.writer();
- try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32);
- },
- else => {
- std.log.err("compression method {} not supported", .{lfh.compression_method});
- return error.CompressionMethodNotSupported;
- },
- }
+ var lfh = try LocalFileHeader.read(self.allocator, reader);
+ defer lfh.deinit(self.allocator);
+ return try lfh.extract(self.allocator, reader, writer);
}
fn pump(reader: anytype, writer: anytype, expected_size_written: usize, expected_crc32: u32) !void {
@@ -190,8 +169,8 @@ const CentralDirectoryHeader = struct {
extra_field: []const u8,
file_comment: []const u8,
- fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !CentralDirectoryHeader {
- return read2(allocator, stream_or_file, CentralDirectoryHeader, &[_]Dynamic{
+ fn read(allocator: std.mem.Allocator, reader: anytype) !CentralDirectoryHeader {
+ return read2(allocator, reader, CentralDirectoryHeader, &[_]Dynamic{
.{ .field_name = "file_name", .length_field_name = "file_name_length" },
.{ .field_name = "extra_field", .length_field_name = "extra_field_length" },
.{ .field_name = "file_comment", .length_field_name = "file_comment_length" },
@@ -216,8 +195,8 @@ const EndOfCentralDirectoryRecord = struct {
comment_length: u16,
comment: []const u8,
- fn read(allocator: std.mem.Allocator, file_or_stream: anytype) !EndOfCentralDirectoryRecord {
- return read2(allocator, file_or_stream, EndOfCentralDirectoryRecord, &[_]Dynamic{
+ fn read(allocator: std.mem.Allocator, reader: anytype) !EndOfCentralDirectoryRecord {
+ return read2(allocator, reader, EndOfCentralDirectoryRecord, &[_]Dynamic{
.{ .field_name = "comment", .length_field_name = "comment_length" },
});
}
@@ -242,8 +221,8 @@ const LocalFileHeader = struct {
file_name: []const u8,
extra_field: []const u8,
- fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !LocalFileHeader {
- return read2(allocator, stream_or_file, LocalFileHeader, &[_]Dynamic{
+ fn read(allocator: std.mem.Allocator, reader: anytype) !LocalFileHeader {
+ return read2(allocator, reader, LocalFileHeader, &[_]Dynamic{
.{ .field_name = "file_name", .length_field_name = "file_name_length" },
.{ .field_name = "extra_field", .length_field_name = "extra_field_length" },
});
@@ -253,6 +232,47 @@ const LocalFileHeader = struct {
allocator.free(self.file_name);
allocator.free(self.extra_field);
}
+
+ fn is_dir(self: *LocalFileHeader) bool {
+ return std.mem.endsWith(u8, self.file_name, "/"); // This is what the java stdlib does
+ }
+
+ fn extract(self: *LocalFileHeader, allocator: std.mem.Allocator, reader: anytype, writer: anytype) !void {
+ const is_encrypted = self.general_purpose_bit_flag.isSet(0);
+ if (is_encrypted) return error.EncryptionNotSupported;
+
+ if (self.is_dir()) {
+ if (self.compressed_size != 0) {
+ // directories can't have a size, this is very likely wrong.
+ return error.InvalidFileName;
+ }
+ return; // Do nothing here. If we were definitely writing to the filesystem we could make an empty dir I guess.
+ }
+
+ var lr = std.io.limitedReader(reader, self.compressed_size);
+ var limited_reader = lr.reader();
+ switch (self.compression_method) {
+ .store => {
+ try pump(limited_reader, writer, self.uncompressed_size, self.crc32);
+ },
+ .deflate => {
+ var decomp = try std.compress.deflate.decompressor(allocator, limited_reader, null);
+ defer decomp.deinit();
+ var decomp_reader = decomp.reader();
+ try pump(decomp_reader, writer, self.uncompressed_size, self.crc32);
+ },
+ .lzma => {
+ var decomp = try std.compress.lzma.decompress(allocator, limited_reader);
+ defer decomp.deinit();
+ var decomp_reader = decomp.reader();
+ try pump(decomp_reader, writer, self.uncompressed_size, self.crc32);
+ },
+ else => {
+ std.log.err("compression method {} not supported", .{self.compression_method});
+ return error.CompressionMethodNotSupported;
+ },
+ }
+ }
};
const Dynamic = struct {
@@ -262,7 +282,7 @@ const Dynamic = struct {
fn read2(
allocator: std.mem.Allocator,
- stream_or_file: anytype,
+ reader: anytype,
comptime T: type,
comptime dynamics: []const Dynamic,
) !T {
@@ -271,7 +291,6 @@ fn read2(
if (ti != .Struct) @compileError("read2 expects type parameter T to be a struct, but it was a " ++ @typeName(T));
const si = ti.Struct;
- var reader = stream_or_file.reader();
const sig_actual = try reader.readIntLittle(u32);
if (sig_actual != T.SIG) {
std.log.err("invalid signature expected {x} got {x}", .{ T.SIG, sig_actual });
@@ -342,10 +361,11 @@ test "extract stored" {
defer zf.deinit();
var out = [_]u8{0} ** 1024;
var fbs_out = std.io.fixedBufferStream(&out);
- try zf.extract(0, &fbs, &fbs_out);
+ var writer = fbs_out.writer();
+ try zf.extract(0, &fbs, writer);
try std.testing.expectEqualStrings("Hello, Zip!", fbs_out.getWritten());
fbs_out.reset();
- try zf.extract(1, &fbs, &fbs_out);
+ try zf.extract(1, &fbs, writer);
try std.testing.expectEqualStrings("hi there\n", fbs_out.getWritten());
}
@@ -356,8 +376,53 @@ test "extract deflate" {
defer zf.deinit();
var out = [_]u8{0} ** 1024;
var fbs_out = std.io.fixedBufferStream(&out);
+ var writer = fbs_out.writer();
try std.testing.expectEqualStrings("Here is a comment :)", zf.file_comment(0));
try std.testing.expectEqual(@This().CompressionMethod.deflate, zf.central_directory_headers[0].compression_method);
- try zf.extract(0, &fbs, &fbs_out);
+ try zf.extract(0, &fbs, writer);
try std.testing.expectEqualStrings(@embedFile("foo.txt"), fbs_out.getWritten());
}
+test "subdir" {
+ const test_zip = @embedFile("subfolder.zip");
+ var fbs = std.io.fixedBufferStream(test_zip);
+ var zf = try @This().from(std.testing.allocator, &fbs);
+ defer zf.deinit();
+ for (0..zf.count_files()) |i| {
+ _ = i;
+ // std.log.err("cdh {any}", .{zf.central_directory_headers[i]});
+ }
+}
+pub const Options = struct {
+ allocator: std.mem.Allocator,
+};
+// tar.zig compatibility, ish. It manages a forwards-only read of the filesystem anyway
+pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
+ const allocator = options.allocator;
+ // var peek_stream = std.io.peekStream(4, reader);
+ // var peek_reader = peek_stream.reader();
+ while (true) {
+ var lfh = try LocalFileHeader.read(allocator, reader);
+ defer lfh.deinit(allocator);
+ if (std.fs.path.dirname(lfh.file_name)) |dn| {
+ try dir.makePath(dn);
+ }
+ if (!lfh.is_dir()) {
+ var f = try dir.createFile(lfh.file_name, .{});
+ defer f.close();
+ var writer = f.writer();
+ try lfh.extract(allocator, reader, writer);
+ }
+ try reader.skipBytes(12, .{}); // Data descriptor signature.
+ // _ = read_to_sig(peek_reader, LocalFileHeader.SIG) catch |e| switch (e) {
+ // error.EndOfStream => return,
+ // else => return e,
+ // };
+ // try peek_stream.putBack(&@as([4]u8, @bitCast(LocalFileHeader.SIG)));
+ }
+}
+
+test "filesystem" {
+ var f = try std.fs.cwd().openFile("src/subfolder.zip", .{});
+ defer f.close();
+ try pipeToFileSystem(try std.fs.cwd().makeOpenPath("test", .{}), f.reader(), .{ .allocator = std.testing.allocator });
+}