diff options
author | Martin Ashby <martin@ashbysoft.com> | 2023-09-15 23:36:00 +0100 |
---|---|---|
committer | Martin Ashby <martin@ashbysoft.com> | 2023-09-15 23:36:00 +0100 |
commit | bce4a48af18f355d60de81eca1ae591dbbc8aa74 (patch) | |
tree | 3e83381b2e5c401ff8f31060c4cdeecb8968f388 /src/main.zig | |
parent | 3fb260c67ed2812f24cadb154fa10cfc5d1a84f8 (diff) | |
download | zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.gz zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.bz2 zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.tar.xz zip-zig-bce4a48af18f355d60de81eca1ae591dbbc8aa74.zip |
initial work on linear extraction like TAR file support
Diffstat (limited to 'src/main.zig')
-rw-r--r-- | src/main.zig | 185 |
1 files changed, 125 insertions, 60 deletions
diff --git a/src/main.zig b/src/main.zig index 1e006f6..cb116b9 100644 --- a/src/main.zig +++ b/src/main.zig @@ -66,20 +66,10 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() { const eocdr_seek_start: usize = epos - eocdr_search_width; try file_or_stream.seekTo(eocdr_seek_start); var reader = file_or_stream.reader(); - const needle = @byteSwap(EndOfCentralDirectoryRecord.SIG); - var window: u32 = try reader.readIntLittle(u32); - while (true) { - if (window == needle) { - try file_or_stream.seekBy(-4); - break; - } - const nb = try reader.readByte(); - window <<= 8; - window |= nb; - } else { - return error.EndOfCentralDirectoryRecordNotFound; - } - var eocdr = try EndOfCentralDirectoryRecord.read(allocator, file_or_stream); + const sb = try read_to_sig(reader, EndOfCentralDirectoryRecord.SIG); + try file_or_stream.seekBy(sb); + + var eocdr = try EndOfCentralDirectoryRecord.read(allocator, reader); errdefer eocdr.deinit(allocator); if (eocdr.disk_number_this != 0 or eocdr.disk_number_central_dir_start != 0) return error.SpansNotSupported; if (eocdr.total_central_dir_entries != eocdr.total_central_dir_entries_on_this_disk) return error.SpansNotSupported; @@ -88,7 +78,7 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() { errdefer allocator.free(central_directory_headers); try file_or_stream.seekTo(eocdr.central_dir_offset); for (0..eocdr.total_central_dir_entries) |i| { - central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, file_or_stream); + central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, reader); } return .{ @@ -97,6 +87,23 @@ pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() { .central_directory_headers = central_directory_headers, }; } + +/// returns how much to seekBy after the signature is found (becuase we'll now have read over it.) +fn read_to_sig(reader: anytype, sig: u32) !i32 { + const needle = @byteSwap(sig); + var window: u32 = try reader.readIntLittle(u32); + while (true) { + if (window == needle) { + return -4; + } + const nb = try reader.readByte(); + window <<= 8; + window |= nb; + } else { + return error.SignatureNotFound; + } +} + pub fn deinit(self: *@This()) void { self.end_of_central_directory_record.deinit(self.allocator); for (0..self.central_directory_headers.len) |i| { @@ -108,47 +115,19 @@ pub fn deinit(self: *@This()) void { pub fn count_files(self: @This()) u16 { return self.end_of_central_directory_record.total_central_dir_entries; } -pub fn file_name(self: @This(), index: u16) []const u8 { +pub fn file_name(self: @This(), index: usize) []const u8 { return self.central_directory_headers[index].file_name; } -pub fn file_comment(self: @This(), index: u16) []const u8 { +pub fn file_comment(self: @This(), index: usize) []const u8 { return self.central_directory_headers[index].file_comment; } -pub fn extract(self: @This(), index: u16, stream_or_file_in: anytype, stream_or_file_out: anytype) !void { +pub fn extract(self: @This(), index: usize, stream_or_file_in: anytype, writer: anytype) !void { const cdh = self.central_directory_headers[index]; try stream_or_file_in.seekTo(cdh.relative_offset_of_local_header); - var lfh = try LocalFileHeader.read(self.allocator, stream_or_file_in); - defer lfh.deinit(self.allocator); - const is_encrypted = lfh.general_purpose_bit_flag.isSet(0); - if (is_encrypted) return error.EncryptionNotSupported; - var reader = stream_or_file_in.reader(); - var lr = std.io.limitedReader(reader, lfh.compressed_size); - var limited_reader = lr.reader(); - switch (lfh.compression_method) { - .store => { - var writer = stream_or_file_out.writer(); - try pump(limited_reader, writer, lfh.uncompressed_size, lfh.crc32); - }, - .deflate => { - var decomp = try std.compress.deflate.decompressor(self.allocator, limited_reader, null); - defer decomp.deinit(); - var decomp_reader = decomp.reader(); - var writer = stream_or_file_out.writer(); - try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32); - }, - .lzma => { - var decomp = try std.compress.lzma.decompress(self.allocator, limited_reader); - defer decomp.deinit(); - var decomp_reader = decomp.reader(); - var writer = stream_or_file_out.writer(); - try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32); - }, - else => { - std.log.err("compression method {} not supported", .{lfh.compression_method}); - return error.CompressionMethodNotSupported; - }, - } + var lfh = try LocalFileHeader.read(self.allocator, reader); + defer lfh.deinit(self.allocator); + return try lfh.extract(self.allocator, reader, writer); } fn pump(reader: anytype, writer: anytype, expected_size_written: usize, expected_crc32: u32) !void { @@ -190,8 +169,8 @@ const CentralDirectoryHeader = struct { extra_field: []const u8, file_comment: []const u8, - fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !CentralDirectoryHeader { - return read2(allocator, stream_or_file, CentralDirectoryHeader, &[_]Dynamic{ + fn read(allocator: std.mem.Allocator, reader: anytype) !CentralDirectoryHeader { + return read2(allocator, reader, CentralDirectoryHeader, &[_]Dynamic{ .{ .field_name = "file_name", .length_field_name = "file_name_length" }, .{ .field_name = "extra_field", .length_field_name = "extra_field_length" }, .{ .field_name = "file_comment", .length_field_name = "file_comment_length" }, @@ -216,8 +195,8 @@ const EndOfCentralDirectoryRecord = struct { comment_length: u16, comment: []const u8, - fn read(allocator: std.mem.Allocator, file_or_stream: anytype) !EndOfCentralDirectoryRecord { - return read2(allocator, file_or_stream, EndOfCentralDirectoryRecord, &[_]Dynamic{ + fn read(allocator: std.mem.Allocator, reader: anytype) !EndOfCentralDirectoryRecord { + return read2(allocator, reader, EndOfCentralDirectoryRecord, &[_]Dynamic{ .{ .field_name = "comment", .length_field_name = "comment_length" }, }); } @@ -242,8 +221,8 @@ const LocalFileHeader = struct { file_name: []const u8, extra_field: []const u8, - fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !LocalFileHeader { - return read2(allocator, stream_or_file, LocalFileHeader, &[_]Dynamic{ + fn read(allocator: std.mem.Allocator, reader: anytype) !LocalFileHeader { + return read2(allocator, reader, LocalFileHeader, &[_]Dynamic{ .{ .field_name = "file_name", .length_field_name = "file_name_length" }, .{ .field_name = "extra_field", .length_field_name = "extra_field_length" }, }); @@ -253,6 +232,47 @@ const LocalFileHeader = struct { allocator.free(self.file_name); allocator.free(self.extra_field); } + + fn is_dir(self: *LocalFileHeader) bool { + return std.mem.endsWith(u8, self.file_name, "/"); // This is what the java stdlib does + } + + fn extract(self: *LocalFileHeader, allocator: std.mem.Allocator, reader: anytype, writer: anytype) !void { + const is_encrypted = self.general_purpose_bit_flag.isSet(0); + if (is_encrypted) return error.EncryptionNotSupported; + + if (self.is_dir()) { + if (self.compressed_size != 0) { + // directories can't have a size, this is very likely wrong. + return error.InvalidFileName; + } + return; // Do nothing here. If we were definitely writing to the filesystem we could make an empty dir I guess. + } + + var lr = std.io.limitedReader(reader, self.compressed_size); + var limited_reader = lr.reader(); + switch (self.compression_method) { + .store => { + try pump(limited_reader, writer, self.uncompressed_size, self.crc32); + }, + .deflate => { + var decomp = try std.compress.deflate.decompressor(allocator, limited_reader, null); + defer decomp.deinit(); + var decomp_reader = decomp.reader(); + try pump(decomp_reader, writer, self.uncompressed_size, self.crc32); + }, + .lzma => { + var decomp = try std.compress.lzma.decompress(allocator, limited_reader); + defer decomp.deinit(); + var decomp_reader = decomp.reader(); + try pump(decomp_reader, writer, self.uncompressed_size, self.crc32); + }, + else => { + std.log.err("compression method {} not supported", .{self.compression_method}); + return error.CompressionMethodNotSupported; + }, + } + } }; const Dynamic = struct { @@ -262,7 +282,7 @@ const Dynamic = struct { fn read2( allocator: std.mem.Allocator, - stream_or_file: anytype, + reader: anytype, comptime T: type, comptime dynamics: []const Dynamic, ) !T { @@ -271,7 +291,6 @@ fn read2( if (ti != .Struct) @compileError("read2 expects type parameter T to be a struct, but it was a " ++ @typeName(T)); const si = ti.Struct; - var reader = stream_or_file.reader(); const sig_actual = try reader.readIntLittle(u32); if (sig_actual != T.SIG) { std.log.err("invalid signature expected {x} got {x}", .{ T.SIG, sig_actual }); @@ -342,10 +361,11 @@ test "extract stored" { defer zf.deinit(); var out = [_]u8{0} ** 1024; var fbs_out = std.io.fixedBufferStream(&out); - try zf.extract(0, &fbs, &fbs_out); + var writer = fbs_out.writer(); + try zf.extract(0, &fbs, writer); try std.testing.expectEqualStrings("Hello, Zip!", fbs_out.getWritten()); fbs_out.reset(); - try zf.extract(1, &fbs, &fbs_out); + try zf.extract(1, &fbs, writer); try std.testing.expectEqualStrings("hi there\n", fbs_out.getWritten()); } @@ -356,8 +376,53 @@ test "extract deflate" { defer zf.deinit(); var out = [_]u8{0} ** 1024; var fbs_out = std.io.fixedBufferStream(&out); + var writer = fbs_out.writer(); try std.testing.expectEqualStrings("Here is a comment :)", zf.file_comment(0)); try std.testing.expectEqual(@This().CompressionMethod.deflate, zf.central_directory_headers[0].compression_method); - try zf.extract(0, &fbs, &fbs_out); + try zf.extract(0, &fbs, writer); try std.testing.expectEqualStrings(@embedFile("foo.txt"), fbs_out.getWritten()); } +test "subdir" { + const test_zip = @embedFile("subfolder.zip"); + var fbs = std.io.fixedBufferStream(test_zip); + var zf = try @This().from(std.testing.allocator, &fbs); + defer zf.deinit(); + for (0..zf.count_files()) |i| { + _ = i; + // std.log.err("cdh {any}", .{zf.central_directory_headers[i]}); + } +} +pub const Options = struct { + allocator: std.mem.Allocator, +}; +// tar.zig compatibility, ish. It manages a forwards-only read of the filesystem anyway +pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { + const allocator = options.allocator; + // var peek_stream = std.io.peekStream(4, reader); + // var peek_reader = peek_stream.reader(); + while (true) { + var lfh = try LocalFileHeader.read(allocator, reader); + defer lfh.deinit(allocator); + if (std.fs.path.dirname(lfh.file_name)) |dn| { + try dir.makePath(dn); + } + if (!lfh.is_dir()) { + var f = try dir.createFile(lfh.file_name, .{}); + defer f.close(); + var writer = f.writer(); + try lfh.extract(allocator, reader, writer); + } + try reader.skipBytes(12, .{}); // Data descriptor signature. + // _ = read_to_sig(peek_reader, LocalFileHeader.SIG) catch |e| switch (e) { + // error.EndOfStream => return, + // else => return e, + // }; + // try peek_stream.putBack(&@as([4]u8, @bitCast(LocalFileHeader.SIG))); + } +} + +test "filesystem" { + var f = try std.fs.cwd().openFile("src/subfolder.zip", .{}); + defer f.close(); + try pipeToFileSystem(try std.fs.cwd().makeOpenPath("test", .{}), f.reader(), .{ .allocator = std.testing.allocator }); +} |