summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Ashby <martin@ashbysoft.com>2023-09-15 23:36:00 +0100
committerMartin Ashby <martin@ashbysoft.com>2023-09-15 23:36:00 +0100
commitbce4a48af18f355d60de81eca1ae591dbbc8aa74 (patch)
tree3e83381b2e5c401ff8f31060c4cdeecb8968f388
parent3fb260c67ed2812f24cadb154fa10cfc5d1a84f8 (diff)
downloadzip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.gz
zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.bz2
zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.xz
zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.zip
initial work on linear extraction like TAR file support
-rw-r--r--src/foo/foo.txt1
-rw-r--r--src/main.zig185
-rw-r--r--src/subfolder.zipbin0 -> 573 bytes
-rw-r--r--test/foo/foo.txt1
4 files changed, 127 insertions, 60 deletions
diff --git a/src/foo/foo.txt b/src/foo/foo.txt
new file mode 100644
index 0000000..decec2d
--- /dev/null
+++ b/src/foo/foo.txt
@@ -0,0 +1 @@
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file
diff --git a/src/main.zig b/src/main.zig
index 1e006f6..cb116b9 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -66,20 +66,10 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() {
const eocdr_seek_start: usize = epos - eocdr_search_width;
try file_or_stream.seekTo(eocdr_seek_start);
var reader = file_or_stream.reader();
- const needle = @byteSwap(EndOfCentralDirectoryRecord.SIG);
- var window: u32 = try reader.readIntLittle(u32);
- while (true) {
- if (window == needle) {
- try file_or_stream.seekBy(-4);
- break;
- }
- const nb = try reader.readByte();
- window <<= 8;
- window |= nb;
- } else {
- return error.EndOfCentralDirectoryRecordNotFound;
- }
- var eocdr = try EndOfCentralDirectoryRecord.read(allocator, file_or_stream);
+ const sb = try read_to_sig(reader, EndOfCentralDirectoryRecord.SIG);
+ try file_or_stream.seekBy(sb);
+
+ var eocdr = try EndOfCentralDirectoryRecord.read(allocator, reader);
errdefer eocdr.deinit(allocator);
if (eocdr.disk_number_this != 0 or eocdr.disk_number_central_dir_start != 0) return error.SpansNotSupported;
if (eocdr.total_central_dir_entries != eocdr.total_central_dir_entries_on_this_disk) return error.SpansNotSupported;
@@ -88,7 +78,7 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() {
errdefer allocator.free(central_directory_headers);
try file_or_stream.seekTo(eocdr.central_dir_offset);
for (0..eocdr.total_central_dir_entries) |i| {
- central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, file_or_stream);
+ central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, reader);
}
return .{
@@ -97,6 +87,23 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() {
.central_directory_headers = central_directory_headers,
};
}
+
+/// returns how much to seekBy after the signature is found (becuase we'll now have read over it.)
+fn read_to_sig(reader: anytype, sig: u32) !i32 {
+ const needle = @byteSwap(sig);
+ var window: u32 = try reader.readIntLittle(u32);
+ while (true) {
+ if (window == needle) {
+ return -4;
+ }
+ const nb = try reader.readByte();
+ window <<= 8;
+ window |= nb;
+ } else {
+ return error.SignatureNotFound;
+ }
+}
+
pub fn deinit(self: *@This()) void {
self.end_of_central_directory_record.deinit(self.allocator);
for (0..self.central_directory_headers.len) |i| {
@@ -108,47 +115,19 @@ pub fn deinit(self: *@This()) void {
pub fn count_files(self: @This()) u16 {
return self.end_of_central_directory_record.total_central_dir_entries;
}
-pub fn file_name(self: @This(), index: u16) []const u8 {
+pub fn file_name(self: @This(), index: usize) []const u8 {
return self.central_directory_headers[index].file_name;
}
-pub fn file_comment(self: @This(), index: u16) []const u8 {
+pub fn file_comment(self: @This(), index: usize) []const u8 {
return self.central_directory_headers[index].file_comment;
}
-pub fn extract(self: @This(), index: u16, stream_or_file_in: anytype, stream_or_file_out: anytype) !void {
+pub fn extract(self: @This(), index: usize, stream_or_file_in: anytype, writer: anytype) !void {
const cdh = self.central_directory_headers[index];
try stream_or_file_in.seekTo(cdh.relative_offset_of_local_header);
- var lfh = try LocalFileHeader.read(self.allocator, stream_or_file_in);
- defer lfh.deinit(self.allocator);
- const is_encrypted = lfh.general_purpose_bit_flag.isSet(0);
- if (is_encrypted) return error.EncryptionNotSupported;
-
var reader = stream_or_file_in.reader();
- var lr = std.io.limitedReader(reader, lfh.compressed_size);
- var limited_reader = lr.reader();
- switch (lfh.compression_method) {
- .store => {
- var writer = stream_or_file_out.writer();
- try pump(limited_reader, writer, lfh.uncompressed_size, lfh.crc32);
- },
- .deflate => {
- var decomp = try std.compress.deflate.decompressor(self.allocator, limited_reader, null);
- defer decomp.deinit();
- var decomp_reader = decomp.reader();
- var writer = stream_or_file_out.writer();
- try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32);
- },
- .lzma => {
- var decomp = try std.compress.lzma.decompress(self.allocator, limited_reader);
- defer decomp.deinit();
- var decomp_reader = decomp.reader();
- var writer = stream_or_file_out.writer();
- try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32);
- },
- else => {
- std.log.err("compression method {} not supported", .{lfh.compression_method});
- return error.CompressionMethodNotSupported;
- },
- }
+ var lfh = try LocalFileHeader.read(self.allocator, reader);
+ defer lfh.deinit(self.allocator);
+ return try lfh.extract(self.allocator, reader, writer);
}
fn pump(reader: anytype, writer: anytype, expected_size_written: usize, expected_crc32: u32) !void {
@@ -190,8 +169,8 @@ const CentralDirectoryHeader = struct {
extra_field: []const u8,
file_comment: []const u8,
- fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !CentralDirectoryHeader {
- return read2(allocator, stream_or_file, CentralDirectoryHeader, &[_]Dynamic{
+ fn read(allocator: std.mem.Allocator, reader: anytype) !CentralDirectoryHeader {
+ return read2(allocator, reader, CentralDirectoryHeader, &[_]Dynamic{
.{ .field_name = "file_name", .length_field_name = "file_name_length" },
.{ .field_name = "extra_field", .length_field_name = "extra_field_length" },
.{ .field_name = "file_comment", .length_field_name = "file_comment_length" },
@@ -216,8 +195,8 @@ const EndOfCentralDirectoryRecord = struct {
comment_length: u16,
comment: []const u8,
- fn read(allocator: std.mem.Allocator, file_or_stream: anytype) !EndOfCentralDirectoryRecord {
- return read2(allocator, file_or_stream, EndOfCentralDirectoryRecord, &[_]Dynamic{
+ fn read(allocator: std.mem.Allocator, reader: anytype) !EndOfCentralDirectoryRecord {
+ return read2(allocator, reader, EndOfCentralDirectoryRecord, &[_]Dynamic{
.{ .field_name = "comment", .length_field_name = "comment_length" },
});
}
@@ -242,8 +221,8 @@ const LocalFileHeader = struct {
file_name: []const u8,
extra_field: []const u8,
- fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !LocalFileHeader {
- return read2(allocator, stream_or_file, LocalFileHeader, &[_]Dynamic{
+ fn read(allocator: std.mem.Allocator, reader: anytype) !LocalFileHeader {
+ return read2(allocator, reader, LocalFileHeader, &[_]Dynamic{
.{ .field_name = "file_name", .length_field_name = "file_name_length" },
.{ .field_name = "extra_field", .length_field_name = "extra_field_length" },
});
@@ -253,6 +232,47 @@ const LocalFileHeader = struct {
allocator.free(self.file_name);
allocator.free(self.extra_field);
}
+
+ fn is_dir(self: *LocalFileHeader) bool {
+ return std.mem.endsWith(u8, self.file_name, "/"); // This is what the java stdlib does
+ }
+
+ fn extract(self: *LocalFileHeader, allocator: std.mem.Allocator, reader: anytype, writer: anytype) !void {
+ const is_encrypted = self.general_purpose_bit_flag.isSet(0);
+ if (is_encrypted) return error.EncryptionNotSupported;
+
+ if (self.is_dir()) {
+ if (self.compressed_size != 0) {
+ // directories can't have a size, this is very likely wrong.
+ return error.InvalidFileName;
+ }
+ return; // Do nothing here. If we were definitely writing to the filesystem we could make an empty dir I guess.
+ }
+
+ var lr = std.io.limitedReader(reader, self.compressed_size);
+ var limited_reader = lr.reader();
+ switch (self.compression_method) {
+ .store => {
+ try pump(limited_reader, writer, self.uncompressed_size, self.crc32);
+ },
+ .deflate => {
+ var decomp = try std.compress.deflate.decompressor(allocator, limited_reader, null);
+ defer decomp.deinit();
+ var decomp_reader = decomp.reader();
+ try pump(decomp_reader, writer, self.uncompressed_size, self.crc32);
+ },
+ .lzma => {
+ var decomp = try std.compress.lzma.decompress(allocator, limited_reader);
+ defer decomp.deinit();
+ var decomp_reader = decomp.reader();
+ try pump(decomp_reader, writer, self.uncompressed_size, self.crc32);
+ },
+ else => {
+ std.log.err("compression method {} not supported", .{self.compression_method});
+ return error.CompressionMethodNotSupported;
+ },
+ }
+ }
};
const Dynamic = struct {
@@ -262,7 +282,7 @@ const Dynamic = struct {
fn read2(
allocator: std.mem.Allocator,
- stream_or_file: anytype,
+ reader: anytype,
comptime T: type,
comptime dynamics: []const Dynamic,
) !T {
@@ -271,7 +291,6 @@ fn read2(
if (ti != .Struct) @compileError("read2 expects type parameter T to be a struct, but it was a " ++ @typeName(T));
const si = ti.Struct;
- var reader = stream_or_file.reader();
const sig_actual = try reader.readIntLittle(u32);
if (sig_actual != T.SIG) {
std.log.err("invalid signature expected {x} got {x}", .{ T.SIG, sig_actual });
@@ -342,10 +361,11 @@ test "extract stored" {
defer zf.deinit();
var out = [_]u8{0} ** 1024;
var fbs_out = std.io.fixedBufferStream(&out);
- try zf.extract(0, &fbs, &fbs_out);
+ var writer = fbs_out.writer();
+ try zf.extract(0, &fbs, writer);
try std.testing.expectEqualStrings("Hello, Zip!", fbs_out.getWritten());
fbs_out.reset();
- try zf.extract(1, &fbs, &fbs_out);
+ try zf.extract(1, &fbs, writer);
try std.testing.expectEqualStrings("hi there\n", fbs_out.getWritten());
}
@@ -356,8 +376,53 @@ test "extract deflate" {
defer zf.deinit();
var out = [_]u8{0} ** 1024;
var fbs_out = std.io.fixedBufferStream(&out);
+ var writer = fbs_out.writer();
try std.testing.expectEqualStrings("Here is a comment :)", zf.file_comment(0));
try std.testing.expectEqual(@This().CompressionMethod.deflate, zf.central_directory_headers[0].compression_method);
- try zf.extract(0, &fbs, &fbs_out);
+ try zf.extract(0, &fbs, writer);
try std.testing.expectEqualStrings(@embedFile("foo.txt"), fbs_out.getWritten());
}
+test "subdir" {
+ const test_zip = @embedFile("subfolder.zip");
+ var fbs = std.io.fixedBufferStream(test_zip);
+ var zf = try @This().from(std.testing.allocator, &fbs);
+ defer zf.deinit();
+ for (0..zf.count_files()) |i| {
+ _ = i;
+ // std.log.err("cdh {any}", .{zf.central_directory_headers[i]});
+ }
+}
+pub const Options = struct {
+ allocator: std.mem.Allocator,
+};
+// tar.zig compatibility, ish. It manages a forwards-only read of the filesystem anyway
+pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
+ const allocator = options.allocator;
+ // var peek_stream = std.io.peekStream(4, reader);
+ // var peek_reader = peek_stream.reader();
+ while (true) {
+ var lfh = try LocalFileHeader.read(allocator, reader);
+ defer lfh.deinit(allocator);
+ if (std.fs.path.dirname(lfh.file_name)) |dn| {
+ try dir.makePath(dn);
+ }
+ if (!lfh.is_dir()) {
+ var f = try dir.createFile(lfh.file_name, .{});
+ defer f.close();
+ var writer = f.writer();
+ try lfh.extract(allocator, reader, writer);
+ }
+ try reader.skipBytes(12, .{}); // Data descriptor signature.
+ // _ = read_to_sig(peek_reader, LocalFileHeader.SIG) catch |e| switch (e) {
+ // error.EndOfStream => return,
+ // else => return e,
+ // };
+ // try peek_stream.putBack(&@as([4]u8, @bitCast(LocalFileHeader.SIG)));
+ }
+}
+
+test "filesystem" {
+ var f = try std.fs.cwd().openFile("src/subfolder.zip", .{});
+ defer f.close();
+ try pipeToFileSystem(try std.fs.cwd().makeOpenPath("test", .{}), f.reader(), .{ .allocator = std.testing.allocator });
+}
diff --git a/src/subfolder.zip b/src/subfolder.zip
new file mode 100644
index 0000000..0dc3a9e
--- /dev/null
+++ b/src/subfolder.zip
Binary files differ
diff --git a/test/foo/foo.txt b/test/foo/foo.txt
new file mode 100644
index 0000000..decec2d
--- /dev/null
+++ b/test/foo/foo.txt
@@ -0,0 +1 @@
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file