const std = @import("std"); // ZIP file implementation // See spec.txt. const CompressionMethod = enum(u16) { store = 0, shrink = 1, reduce_1 = 2, reduce_2 = 3, reduce_3 = 4, reduce_4 = 5, implode = 6, reserved_1 = 7, deflate = 8, deflate64 = 9, pkware_implode = 10, reserved_2 = 11, bzip2 = 12, reserved_3 = 13, lzma = 14, reserved_4 = 15, ibm_zos_zmpsc = 16, reserved_5 = 17, ibm_terse = 18, ibm_lz77_z = 19, zstd_deprecated = 20, zstd = 93, mp3 = 94, xz = 95, jpeg = 96, wavpack = 97, ppmd_version_i_rev1 = 98, aex_encryption_marker = 99, }; // [local file header 1] // [encryption header 1] // [file data 1] // [data descriptor 1] // . // . // . // [local file header n] // [encryption header n] // [file data n] // [data descriptor n] // [archive decryption header] // [archive extra data record] // [central directory header 1] // . // . // . // [central directory header n] // [zip64 end of central directory record] // [zip64 end of central directory locator] // [end of central directory record] allocator: std.mem.Allocator, end_of_central_directory_record: EndOfCentralDirectoryRecord, central_directory_headers: std.ArrayList(CentralDirectoryHeader), pub fn empty(allocator: std.mem.Allocator) @This() { return .{ .allocator = allocator, .end_of_central_directory_record = EndOfCentralDirectoryRecord.empty(), .central_directory_headers = std.ArrayList(CentralDirectoryHeader).init(allocator), }; } pub fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() { // Find the EndOfCentralDirectoryRecord. It must be in the last 64k of the file const eocdr_search_width_max: usize = 64_000; const epos = try file_or_stream.getEndPos(); const eocdr_search_width: usize = @min(epos, eocdr_search_width_max); const eocdr_seek_start: usize = epos - eocdr_search_width; try file_or_stream.seekTo(eocdr_seek_start); var reader = file_or_stream.reader(); const sb = try read_to_sig(reader, EndOfCentralDirectoryRecord.SIG); try file_or_stream.seekBy(sb); var eocdr = try EndOfCentralDirectoryRecord.read(allocator, reader); errdefer eocdr.deinit(allocator); if (eocdr.disk_number_this != 0 or eocdr.disk_number_central_dir_start != 0) return error.SpansNotSupported; if (eocdr.total_central_dir_entries != eocdr.total_central_dir_entries_on_this_disk) return error.SpansNotSupported; try file_or_stream.seekTo(eocdr.central_dir_offset); var central_directory_headers = try std.ArrayList(CentralDirectoryHeader).initCapacity(allocator, eocdr.total_central_dir_entries); errdefer { // while (central_directory_headers.popOrNull()) |cdh| { // cdh.deinit(allocator); // } var cdh: ?CentralDirectoryHeader = central_directory_headers.popOrNull(); while (cdh != null) { cdh.?.deinit(allocator); cdh = central_directory_headers.popOrNull(); } central_directory_headers.deinit(); } for (0..eocdr.total_central_dir_entries) |_| { central_directory_headers.appendAssumeCapacity(try CentralDirectoryHeader.read(allocator, reader)); } return .{ .allocator = allocator, .end_of_central_directory_record = eocdr, .central_directory_headers = central_directory_headers, }; } pub fn deinit(self: *@This()) void { self.end_of_central_directory_record.deinit(self.allocator); var cdh: ?CentralDirectoryHeader = self.central_directory_headers.popOrNull(); while (cdh != null) { cdh.?.deinit(self.allocator); cdh = self.central_directory_headers.popOrNull(); } self.central_directory_headers.deinit(); } /// returns how much to seekBy after the signature is found (becuase we'll now have read past it.) fn read_to_sig(reader: anytype, sig: u32) !i32 { const needle = @byteSwap(sig); var window: u32 = try reader.readIntLittle(u32); while (true) { if (window == needle) { return -4; } const nb = try reader.readByte(); window <<= 8; window |= nb; } else { return error.SignatureNotFound; } } pub fn count_files(self: @This()) u16 { return self.end_of_central_directory_record.total_central_dir_entries; } pub fn file_name(self: @This(), index: usize) []const u8 { return self.central_directory_headers.items[index].file_name; } pub fn file_comment(self: @This(), index: usize) []const u8 { return self.central_directory_headers.items[index].file_comment; } pub fn is_dir(self: *@This(), index: usize) bool { return std.mem.endsWith(u8, self.central_directory_headers.items[index].file_name, "/"); // This is what the java stdlib does } pub fn extract(self: @This(), index: usize, stream_or_file_in: anytype, writer: anytype) !void { const cdh = self.central_directory_headers.items[index]; try stream_or_file_in.seekTo(cdh.relative_offset_of_local_header); var reader = stream_or_file_in.reader(); var lfh = try LocalFileHeader.read(self.allocator, reader); defer lfh.deinit(self.allocator); return try lfh.extract(self.allocator, reader, writer); } fn pump(reader: anytype, writer: anytype, expected_size_written: usize, expected_crc32: u32) !void { var buf = [_]u8{0} ** 1024; var crc32 = std.hash.Crc32.init(); var written: usize = 0; while (true) { const read = try reader.read(&buf); if (read == 0) break; const write = buf[0..read]; try writer.writeAll(write); crc32.update(write); written += read; } if (written != expected_size_written) return error.WrongUncompressedSize; if (crc32.final() != expected_crc32) return error.WrongChecksum; } fn pump_returning(reader: anytype, writer: anytype) !struct { written: usize, crc32: u32 } { var buf = [_]u8{0} ** 1024; var crc32 = std.hash.Crc32.init(); var written: usize = 0; while (true) { const read = try reader.read(&buf); if (read == 0) break; const write = buf[0..read]; try writer.writeAll(write); crc32.update(write); written += read; } return .{ .written = written, .crc32 = crc32.final(), }; } const CentralDirectoryHeader = struct { const SIG: u32 = @as(u32, 0x02014b50); version_made_by: u16, version_needed_to_extract: u16, general_purpose_bit_flag: u16, compression_method: CompressionMethod, last_mod_file_time: u16, last_mod_file_date: u16, crc32: u32, compressed_size: u32, uncompressed_size: u32, file_name_length: u16, extra_field_length: u16, file_comment_length: u16, disk_number_start: u16, internal_file_attributes: u16, external_file_attributes: u32, relative_offset_of_local_header: u32, file_name: []const u8, extra_field: []const u8, file_comment: []const u8, fn read(allocator: std.mem.Allocator, reader: anytype) !CentralDirectoryHeader { return read2(allocator, reader, CentralDirectoryHeader, &[_]Dynamic{ .{ .field_name = "file_name", .length_field_name = "file_name_length" }, .{ .field_name = "extra_field", .length_field_name = "extra_field_length" }, .{ .field_name = "file_comment", .length_field_name = "file_comment_length" }, }); } fn write(self: *CentralDirectoryHeader, writer: anytype) !void { // TODO generics self.file_name_length = @intCast(self.file_name.len); self.extra_field_length = @intCast(self.extra_field.len); self.file_comment_length = @intCast(self.file_comment); try writer.writeIntLittle(u32, SIG); try writer.writeIntLittle(u16, self.version_made_by); try writer.writeIntLittle(u16, self.version_needed_to_extract); try writer.writeIntLittle(u16, self.general_purpose_bit_flag); try writer.writeIntLittle(u16, @intFromEnum(self.compression_method)); try writer.writeIntLittle(u16, self.last_mod_file_time); try writer.writeIntLittle(u16, self.last_mod_file_date); try writer.writeIntLittle(u32, self.crc32); try writer.writeIntLittle(u32, self.compressed_size); try writer.writeIntLittle(u32, self.uncompressed_size); try writer.writeIntLittle(u16, self.file_name_length); try writer.writeIntLittle(u16, self.extra_field_length); try writer.writeIntLittle(u16, self.file_comment_length); try writer.writeIntLittle(u16, self.disk_number_start); try writer.writeIntLittle(u16, self.internal_file_attributes); try writer.writeIntLittle(u32, self.external_file_attributes); try writer.writeIntLittle(u32, self.relative_offset_of_local_header); try writer.writeAll(self.file_name); try writer.writeAll(self.extra_field); try writer.writeAll(self.file_comment); } fn from(allocator: std.mem.Allocator, lfh: LocalFileHeader, offset: u32) !CentralDirectoryHeader { // TODO generics return .{ .version_needed_to_extract = lfh.version_needed_to_extract, .general_purpose_bit_flag = lfh.general_purpose_bit_flag, .compression_method = lfh.compression_method, .last_mod_file_time = lfh.last_mod_file_time, .last_mod_file_date = lfh.last_mod_file_date, .crc32 = lfh.crc32, .compressed_size = lfh.compressed_size, .uncompressed_size = lfh.uncompressed_size, .file_name_length = lfh.file_name_length, .extra_field_length = lfh.extra_field_length, .file_comment_length = 0, .disk_number_start = 0, .internal_file_attributes = 0, .external_file_attributes = 0, .relative_offset_of_local_header = offset, .file_name = try allocator.dupe(lfh.file_name), .extra_field = try allocator.dupe(lfh.extra_field), .file_comment = [_]u8{}, }; } fn deinit(self: *CentralDirectoryHeader, allocator: std.mem.Allocator) void { allocator.free(self.file_name); allocator.free(self.extra_field); allocator.free(self.file_comment); } }; const EndOfCentralDirectoryRecord = struct { const SIG: u32 = @as(u32, 0x06054b50); disk_number_this: u16, disk_number_central_dir_start: u16, total_central_dir_entries_on_this_disk: u16, total_central_dir_entries: u16, size_of_central_dir: u32, central_dir_offset: u32, comment_length: u16, comment: []const u8, fn read(allocator: std.mem.Allocator, reader: anytype) !EndOfCentralDirectoryRecord { return read2(allocator, reader, EndOfCentralDirectoryRecord, &[_]Dynamic{ .{ .field_name = "comment", .length_field_name = "comment_length" }, }); } fn empty() EndOfCentralDirectoryRecord { return .{ .disk_number_this = 0, .disk_number_central_dir_start = 0, .total_central_dir_entries_on_this_disk = 0, .total_central_dir_entries = 0, .size_of_central_dir = 0, .central_dir_offset = 0, .comment_length = 0, .comment = &[_]u8{}, }; } fn write(self: *EndOfCentralDirectoryRecord, writer: anytype) !void { self.comment_length = @intCast(self.comment.len); try writer.writeIntLittle(u32, SIG); try writer.writeIntLittle(u16, self.disk_number_this); try writer.writeIntLittle(u16, self.disk_number_central_dir_start); try writer.writeIntLittle(u16, self.total_central_dir_entries_on_this_disk); try writer.writeIntLittle(u16, self.total_central_dir_entries); try writer.writeIntLittle(u32, self.size_of_central_dir); try writer.writeIntLittle(u32, self.central_dir_offset); try writer.writeIntLittle(u16, self.comment_length); try writer.writeAll(self.comment); } fn deinit(self: *EndOfCentralDirectoryRecord, allocator: std.mem.Allocator) void { allocator.free(self.comment); } }; const LocalFileHeader = struct { const SIG: u32 = @as(u32, 0x04034b50); version_needed_to_extract: u16, general_purpose_bit_flag: std.bit_set.IntegerBitSet(16), compression_method: CompressionMethod, last_mod_file_time: u16, last_mod_file_date: u16, crc32: u32, compressed_size: u32, uncompressed_size: u32, file_name_length: u16, extra_field_length: u16, file_name: []const u8, extra_field: []const u8, fn read(allocator: std.mem.Allocator, reader: anytype) !LocalFileHeader { return read2(allocator, reader, LocalFileHeader, &[_]Dynamic{ .{ .field_name = "file_name", .length_field_name = "file_name_length" }, .{ .field_name = "extra_field", .length_field_name = "extra_field_length" }, }); } fn from(allocator: std.mem.Allocator, path: []const u8) !LocalFileHeader { const file = try std.fs.cwd().openFile(path, .{}); defer file.close(); const md = try file.metadata(); var rdr = file.reader(); var cw = std.io.countingWriter(std.io.null_writer); var comp = try std.compress.deflate.compressor(allocator, cw, .{}); defer comp.deinit(); const written, const crc32 = try pump_returning(rdr, comp.writer()); return .{ .version_needed_to_extract = 1, .general_purpose_bit_flag = std.bit_set.IntegerBitSet(16).initEmpty(), .compression_method = .deflate, .last_mod_file_time = 0, // TODO .last_mod_file_date = 0, // TODO .crc32 = crc32, .compressed_size = @intCast(written), .uncompressed_size = @intCast(md.size()), // TODO zip64 support .file_name_length = path.len, .extra_field_length = 0, .file_name = allocator.dupe(u8, path), .extra_field = &[_]u8{}, }; } fn write(self: *LocalFileHeader, writer: anytype) !void { self.file_name_length = @intCast(self.file_name.len); self.extra_field_length = @intCast(self.extra_field.len); try writer.writeIntLittle(u32, SIG); try writer.writeIntLittle(u16, self.version_needed_to_extract); try writer.writeIntLittle(u16, self.general_purpose_bit_flag); try writer.writeIntLittle(u16, self.compression_method); try writer.writeIntLittle(u16, self.last_mod_file_time); try writer.writeIntLittle(u16, self.last_mod_file_date); try writer.writeIntLittle(u32, self.crc32); try writer.writeIntLittle(u32, self.compressed_size); try writer.writeIntLittle(u32, self.uncompressed_size); try writer.writeIntLittle(u16, self.file_name_length); try writer.writeIntLittle(u16, self.extra_field_length); try writer.writeAll(self.file_name); try writer.writeAll(self.extra_field); } fn deinit(self: *LocalFileHeader, allocator: std.mem.Allocator) void { allocator.free(self.file_name); allocator.free(self.extra_field); } fn is_dir(self: *LocalFileHeader) bool { return std.mem.endsWith(u8, self.file_name, "/"); // This is what the java stdlib does } fn extract(self: *LocalFileHeader, allocator: std.mem.Allocator, reader: anytype, writer: anytype) !void { const is_encrypted = self.general_purpose_bit_flag.isSet(0); if (is_encrypted) return error.EncryptionNotSupported; if (self.is_dir()) { if (self.compressed_size != 0) { // directories can't have a size, this is very likely wrong. return error.InvalidFileName; } return; // Do nothing here. If we were definitely writing to a filesystem we could make an empty dir I guess. } var lr = std.io.limitedReader(reader, self.compressed_size); var limited_reader = lr.reader(); switch (self.compression_method) { .store => { try pump(limited_reader, writer, self.uncompressed_size, self.crc32); }, .deflate => { var decomp = try std.compress.deflate.decompressor(allocator, limited_reader, null); defer decomp.deinit(); var decomp_reader = decomp.reader(); try pump(decomp_reader, writer, self.uncompressed_size, self.crc32); }, .lzma => { var decomp = try std.compress.lzma.decompress(allocator, limited_reader); defer decomp.deinit(); var decomp_reader = decomp.reader(); try pump(decomp_reader, writer, self.uncompressed_size, self.crc32); }, else => { std.log.err("compression method {} not supported", .{self.compression_method}); return error.CompressionMethodNotSupported; }, } } }; const Dynamic = struct { field_name: []const u8, length_field_name: []const u8, }; fn read2( allocator: std.mem.Allocator, reader: anytype, comptime T: type, comptime dynamics: []const Dynamic, ) !T { if (!@hasDecl(T, "SIG")) @compileError("Expected decl SIG:u32 on type " ++ @typeName(T)); const ti = @typeInfo(T); if (ti != .Struct) @compileError("read2 expects type parameter T to be a struct, but it was a " ++ @typeName(T)); const si = ti.Struct; const sig_actual = try reader.readIntLittle(u32); if (sig_actual != T.SIG) { std.log.err("invalid signature expected {x} got {x}", .{ T.SIG, sig_actual }); return error.InvalidSignature; } var t: T = undefined; inline for (si.fields) |field| { const fti = @typeInfo(field.type); dynamic: inline for (dynamics) |dyn| { if (comptime std.mem.eql(u8, dyn.field_name, field.name)) { if (fti != .Pointer) @compileError("field " ++ field.name ++ " is marked dynamic but isn't a pointer. Instead it's a " ++ @typeName(field.type)); const pi = fti.Pointer; if (pi.size != .Slice) @compileError("field " ++ field.name ++ " is marked dynamic, but isn't a slice, instead it's sized " ++ @tagName(pi.size)); const len = @field(t, dyn.length_field_name); var buf = try allocator.alloc(pi.child, len); // TODO how to errdefer in a loop, not sure where the scope ends. _ = try reader.readAll(buf); @field(t, field.name) = buf; break :dynamic; } } else { switch (fti) { .Int => { @field(t, field.name) = try reader.readIntLittle(field.type); }, .Struct => |fsi| { if (fsi.backing_integer) |bi| { const int = try reader.readIntLittle(bi); @field(t, field.name) = @bitCast(int); } else @compileError("only packed struct with backing integer are supported, field " ++ field.name ++ " type " ++ @typeName(field.type) ++ "is not such a struct"); }, .Enum => |fei| { if (@typeInfo(fei.tag_type) == .Int) { const int = try reader.readIntLittle(fei.tag_type); @field(t, field.name) = @enumFromInt(int); } else @compileError("only enum with integer tag type are supported field " ++ field.name ++ " type " ++ @typeName(field.type) ++ "is not such a struct"); }, else => @compileError("don't know how to handle field " ++ field.name ++ " of type " ++ @tagName(fti)), } } } return t; } pub const Options = struct { allocator: std.mem.Allocator, }; /// tar.zig compatibility, ish. It does a forwards only pass of a zipfile pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { const allocator = options.allocator; // var peek_stream = std.io.peekStream(4, reader); // var peek_reader = peek_stream.reader(); var i: u32 = 0; while (true) { std.log.err("reading file {}", .{i}); var lfh = LocalFileHeader.read(allocator, reader) catch |e| switch (e) { error.InvalidSignature => return, // done else => return e, }; defer lfh.deinit(allocator); if (std.fs.path.dirname(lfh.file_name)) |dn| { try dir.makePath(dn); } if (!lfh.is_dir()) { var f = try dir.createFile(lfh.file_name, .{}); defer f.close(); var writer = f.writer(); try lfh.extract(allocator, reader, writer); } // TODO skip data descriptor i += 1; } } pub const WriteOptions = struct {}; /// Write a whole zip file pub fn write_zip(allocator: std.mem.Allocator, path_iterator: anytype, writer: anytype) !void { var cw = std.io.countingWriter(writer); var zf = @This().empty(allocator); defer zf.deinit(); while (path_iterator.next()) |path| { const file = try std.fs.cwd().openFile(path, .{}); defer file.close(); var lfh = try LocalFileHeader.from(allocator, path); defer lfh.deinit(allocator); var cdh = try CentralDirectoryHeader.from(allocator, lfh, cw.bytes_written); try zf.central_directory_headers.append(cdh); try lfh.write(cw); _ = pump_returning(file.reader(), cw); } const central_dir_offset = cw.bytes_written; for (zf.central_directory_headers.items) |cdh| { try cdh.write(cw); } const size_of_central_dir = cw.bytes_written - central_dir_offset; var eocdr = EndOfCentralDirectoryRecord{ .disk_number_this = 0, .disk_number_central_dir_start = 0, .total_central_dir_entries_on_this_disk = zf.central_directory_headers.items.len, .total_central_dir_entries = zf.central_directory_headers.items.len, .size_of_central_dir = size_of_central_dir, .central_dir_offset = central_dir_offset, .comment_length = 0, .comment = &[_]u8{}, }; try eocdr.write(cw); } const TestFileIter = struct { files: [2][]const u8, ix: usize, fn next(self: TestFileIter) ?[]const u8 { if (self.ix < 2) { defer self.ix += 1; return self.files[self.ix]; } return null; } }; test "write" { const allocator = std.testing.allocator; const out = try std.fs.cwd().createFile("test_out.zip", .{}); defer out.close(); const tfi = TestFileIter{ .ix = 0, .files = [_][]const u8{ "src/foo.txt", "src/bar.txt" }, }; try write_zip(allocator, tfi, out.writer()); } test "open stream" { const test_zip = @embedFile("hello.zip"); var fbs = std.io.fixedBufferStream(test_zip); var zf = try @This().from(std.testing.allocator, &fbs); defer zf.deinit(); try std.testing.expectEqual(zf.count_files(), 2); try std.testing.expectEqualStrings(zf.file_name(0), "hello.txt"); try std.testing.expectEqualStrings(zf.file_name(1), "foo.txt"); } test "open file" { const test_zip = try std.fs.cwd().openFile("src/hello.zip", .{}); var zf = try @This().from(std.testing.allocator, &test_zip); defer zf.deinit(); try std.testing.expectEqual(zf.count_files(), 2); try std.testing.expectEqualStrings(zf.file_name(0), "hello.txt"); try std.testing.expectEqualStrings(zf.file_name(1), "foo.txt"); } test "extract stored" { const test_zip = @embedFile("hello.zip"); var fbs = std.io.fixedBufferStream(test_zip); var zf = try @This().from(std.testing.allocator, &fbs); defer zf.deinit(); var out = [_]u8{0} ** 1024; var fbs_out = std.io.fixedBufferStream(&out); var writer = fbs_out.writer(); try zf.extract(0, &fbs, writer); try std.testing.expectEqualStrings("Hello, Zip!", fbs_out.getWritten()); fbs_out.reset(); try zf.extract(1, &fbs, writer); try std.testing.expectEqualStrings("hi there\n", fbs_out.getWritten()); } test "extract deflate" { const test_zip = @embedFile("deflate.zip"); var fbs = std.io.fixedBufferStream(test_zip); var zf = try @This().from(std.testing.allocator, &fbs); defer zf.deinit(); var out = [_]u8{0} ** 1024; var fbs_out = std.io.fixedBufferStream(&out); var writer = fbs_out.writer(); try std.testing.expectEqualStrings("Here is a comment :)", zf.file_comment(0)); try std.testing.expectEqual(@This().CompressionMethod.deflate, zf.central_directory_headers.items[0].compression_method); try zf.extract(0, &fbs, writer); try std.testing.expectEqualStrings(@embedFile("foo.txt"), fbs_out.getWritten()); } test "subdir" { const test_zip = @embedFile("subfolder.zip"); var fbs = std.io.fixedBufferStream(test_zip); var zf = try @This().from(std.testing.allocator, &fbs); defer zf.deinit(); try std.testing.expectEqual(true, zf.is_dir(0)); try std.testing.expectEqual(false, zf.is_dir(1)); } test "pipe to filesystem" { var f = try std.fs.cwd().openFile("src/subfolder.zip", .{}); defer f.close(); try pipeToFileSystem(try std.fs.cwd().makeOpenPath("test", .{}), f.reader(), .{ .allocator = std.testing.allocator }); }