const std = @import("std"); // ZIP file implementation // See spec.txt. const CompressionMethod = enum(u16) { store = 0, shrink = 1, reduce_1 = 2, reduce_2 = 3, reduce_3 = 4, reduce_4 = 5, implode = 6, reserved_1 = 7, deflate = 8, deflate64 = 9, pkware_implode = 10, reserved_2 = 11, bzip2 = 12, reserved_3 = 13, lzma = 14, reserved_4 = 15, ibm_zos_zmpsc = 16, reserved_5 = 17, ibm_terse = 18, ibm_lz77_z = 19, zstd_deprecated = 20, zstd = 93, mp3 = 94, xz = 95, jpeg = 96, wavpack = 97, ppmd_version_i_rev1 = 98, aex_encryption_marker = 99, }; // [local file header 1] // [encryption header 1] // [file data 1] // [data descriptor 1] // . // . // . // [local file header n] // [encryption header n] // [file data n] // [data descriptor n] // [archive decryption header] // [archive extra data record] // [central directory header 1] // . // . // . // [central directory header n] // [zip64 end of central directory record] // [zip64 end of central directory locator] // [end of central directory record] allocator: std.mem.Allocator, end_of_central_directory_record: EndOfCentralDirectoryRecord, central_directory_headers: []CentralDirectoryHeader, fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !@This() { // Find the EndOfCentralDirectoryRecord. It must be in the last 64k of the file const eocdr_search_width_max: usize = 64_000; const epos = try file_or_stream.getEndPos(); const eocdr_search_width: usize = @min(epos, eocdr_search_width_max); const eocdr_seek_start: usize = epos - eocdr_search_width; try file_or_stream.seekTo(eocdr_seek_start); var reader = file_or_stream.reader(); const needle = @byteSwap(EndOfCentralDirectoryRecord.SIG); var window: u32 = try reader.readIntLittle(u32); while (true) { if (window == needle) { try file_or_stream.seekBy(-4); break; } const nb = try reader.readByte(); window <<= 8; window |= nb; } else { return error.EndOfCentralDirectoryRecordNotFound; } var eocdr = try EndOfCentralDirectoryRecord.read(allocator, file_or_stream); errdefer eocdr.deinit(allocator); if (eocdr.disk_number_this != 0 or eocdr.disk_number_central_dir_start != 0) return error.SpansNotSupported; if (eocdr.total_central_dir_entries != eocdr.total_central_dir_entries_on_this_disk) return error.SpansNotSupported; var central_directory_headers = try allocator.alloc(CentralDirectoryHeader, eocdr.total_central_dir_entries); errdefer allocator.free(central_directory_headers); try file_or_stream.seekTo(eocdr.central_dir_offset); for (0..eocdr.total_central_dir_entries) |i| { central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, file_or_stream); } return .{ .allocator = allocator, .end_of_central_directory_record = eocdr, .central_directory_headers = central_directory_headers, }; } fn deinit(self: *@This()) void { self.end_of_central_directory_record.deinit(self.allocator); for (0..self.central_directory_headers.len) |i| { self.central_directory_headers[i].deinit(self.allocator); } self.allocator.free(self.central_directory_headers); } fn count_files(self: @This()) u16 { return self.end_of_central_directory_record.total_central_dir_entries; } fn file_name(self: @This(), index: u16) []const u8 { return self.central_directory_headers[index].file_name; } fn file_comment(self: @This(), index: u16) []const u8 { return self.central_directory_headers[index].file_comment; } fn extract(self: @This(), index: u16, stream_or_file_in: anytype, stream_or_file_out: anytype) !void { const cdh = self.central_directory_headers[index]; try stream_or_file_in.seekTo(cdh.relative_offset_of_local_header); var lfh = try LocalFileHeader.read(self.allocator, stream_or_file_in); defer lfh.deinit(self.allocator); const is_encrypted = lfh.general_purpose_bit_flag.isSet(0); if (is_encrypted) return error.EncryptionNotSupported; var reader = stream_or_file_in.reader(); var lr = std.io.limitedReader(reader, lfh.compressed_size); var limited_reader = lr.reader(); switch (lfh.compression_method) { .store => { var writer = stream_or_file_out.writer(); try pump(limited_reader, writer, lfh.uncompressed_size, lfh.crc32); }, .deflate => { var decomp = try std.compress.deflate.decompressor(self.allocator, limited_reader, null); defer decomp.deinit(); var decomp_reader = decomp.reader(); var writer = stream_or_file_out.writer(); try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32); }, .lzma => { var decomp = try std.compress.lzma.decompress(self.allocator, limited_reader); defer decomp.deinit(); var decomp_reader = decomp.reader(); var writer = stream_or_file_out.writer(); try pump(decomp_reader, writer, lfh.uncompressed_size, lfh.crc32); }, else => { std.log.err("compression method {} not supported", .{lfh.compression_method}); return error.CompressionMethodNotSupported; }, } } fn pump(reader: anytype, writer: anytype, expected_size_written: usize, expected_crc32: u32) !void { var buf = [_]u8{0} ** 1024; var crc32 = std.hash.Crc32.init(); var written: usize = 0; while (true) { const read = try reader.read(&buf); if (read == 0) break; const write = buf[0..read]; try writer.writeAll(write); crc32.update(write); written += read; } if (written != expected_size_written) return error.WrongUncompressedSize; if (crc32.final() != expected_crc32) return error.WrongChecksum; } const CentralDirectoryHeader = struct { const SIG: u32 = @as(u32, 0x02014b50); version_made_by: u16, version_needed_to_extract: u16, general_purpose_bit_flag: u16, compression_method: CompressionMethod, last_mod_file_time: u16, last_mod_file_date: u16, crc32: u32, compressed_size: u32, uncompressed_size: u32, file_name_length: u16, extra_field_length: u16, file_comment_length: u16, disk_number_start: u16, internal_file_attributes: u16, external_file_attributes: u32, relative_offset_of_local_header: u32, file_name: []const u8, extra_field: []const u8, file_comment: []const u8, fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !CentralDirectoryHeader { return read2(allocator, stream_or_file, CentralDirectoryHeader, &[_]Dynamic{ .{ .field_name = "file_name", .length_field_name = "file_name_length" }, .{ .field_name = "extra_field", .length_field_name = "extra_field_length" }, .{ .field_name = "file_comment", .length_field_name = "file_comment_length" }, }); } fn deinit(self: *CentralDirectoryHeader, allocator: std.mem.Allocator) void { allocator.free(self.file_name); allocator.free(self.extra_field); allocator.free(self.file_comment); } }; const EndOfCentralDirectoryRecord = struct { const SIG: u32 = @as(u32, 0x06054b50); disk_number_this: u16, disk_number_central_dir_start: u16, total_central_dir_entries_on_this_disk: u16, total_central_dir_entries: u16, size_of_central_dir: u32, central_dir_offset: u32, comment_length: u16, comment: []const u8, fn read(allocator: std.mem.Allocator, file_or_stream: anytype) !EndOfCentralDirectoryRecord { return read2(allocator, file_or_stream, EndOfCentralDirectoryRecord, &[_]Dynamic{ .{ .field_name = "comment", .length_field_name = "comment_length" }, }); } fn deinit(self: *EndOfCentralDirectoryRecord, allocator: std.mem.Allocator) void { allocator.free(self.comment); } }; const LocalFileHeader = struct { const SIG: u32 = @as(u32, 0x04034b50); version_needed_to_extract: u16, general_purpose_bit_flag: std.bit_set.IntegerBitSet(16), compression_method: CompressionMethod, last_mod_file_time: u16, last_mod_file_date: u16, crc32: u32, compressed_size: u32, uncompressed_size: u32, file_name_length: u16, extra_field_length: u16, file_name: []const u8, extra_field: []const u8, fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !LocalFileHeader { return read2(allocator, stream_or_file, LocalFileHeader, &[_]Dynamic{ .{ .field_name = "file_name", .length_field_name = "file_name_length" }, .{ .field_name = "extra_field", .length_field_name = "extra_field_length" }, }); } fn deinit(self: *LocalFileHeader, allocator: std.mem.Allocator) void { allocator.free(self.file_name); allocator.free(self.extra_field); } }; const Dynamic = struct { field_name: []const u8, length_field_name: []const u8, }; fn read2( allocator: std.mem.Allocator, stream_or_file: anytype, comptime T: type, comptime dynamics: []const Dynamic, ) !T { if (!@hasDecl(T, "SIG")) @compileError("Expected decl SIG:u32 on type " ++ @typeName(T)); const ti = @typeInfo(T); if (ti != .Struct) @compileError("read2 expects type parameter T to be a struct, but it was a " ++ @typeName(T)); const si = ti.Struct; var reader = stream_or_file.reader(); const sig_actual = try reader.readIntLittle(u32); if (sig_actual != T.SIG) { std.log.err("invalid signature expected {x} got {x}", .{ T.SIG, sig_actual }); return error.InvalidSignature; } var t: T = undefined; inline for (si.fields) |field| { const fti = @typeInfo(field.type); dynamic: inline for (dynamics) |dyn| { if (comptime std.mem.eql(u8, dyn.field_name, field.name)) { if (fti != .Pointer) @compileError("field " ++ field.name ++ " is marked dynamic but isn't a pointer. Instead it's a " ++ @typeName(field.type)); const pi = fti.Pointer; if (pi.size != .Slice) @compileError("field " ++ field.name ++ " is marked dynamic, but isn't a slice, instead it's sized " ++ @tagName(pi.size)); const len = @field(t, dyn.length_field_name); var buf = try allocator.alloc(pi.child, len); // TODO how to errdefer in a loop, not sure where the scope ends. _ = try reader.readAll(buf); @field(t, field.name) = buf; break :dynamic; } } else { switch (fti) { .Int => { @field(t, field.name) = try reader.readIntLittle(field.type); }, .Struct => |fsi| { if (fsi.backing_integer) |bi| { const int = try reader.readIntLittle(bi); @field(t, field.name) = @bitCast(int); } else @compileError("only packed struct with backing integer are supported, field " ++ field.name ++ " type " ++ @typeName(field.type) ++ "is not such a struct"); }, .Enum => |fei| { if (@typeInfo(fei.tag_type) == .Int) { const int = try reader.readIntLittle(fei.tag_type); @field(t, field.name) = @enumFromInt(int); } else @compileError("only enum with integer tag type are supported field " ++ field.name ++ " type " ++ @typeName(field.type) ++ "is not such a struct"); }, else => @compileError("don't know how to handle field " ++ field.name ++ " of type " ++ @tagName(fti)), } } } return t; } test "open stream" { const test_zip = @embedFile("hello.zip"); var fbs = std.io.fixedBufferStream(test_zip); var zf = try @This().from(std.testing.allocator, &fbs); defer zf.deinit(); try std.testing.expectEqual(zf.count_files(), 2); try std.testing.expectEqualStrings(zf.file_name(0), "hello.txt"); try std.testing.expectEqualStrings(zf.file_name(1), "foo.txt"); } test "open file" { const test_zip = try std.fs.cwd().openFile("src/hello.zip", .{}); var zf = try @This().from(std.testing.allocator, &test_zip); defer zf.deinit(); try std.testing.expectEqual(zf.count_files(), 2); try std.testing.expectEqualStrings(zf.file_name(0), "hello.txt"); try std.testing.expectEqualStrings(zf.file_name(1), "foo.txt"); } test "extract stored" { const test_zip = @embedFile("hello.zip"); var fbs = std.io.fixedBufferStream(test_zip); var zf = try @This().from(std.testing.allocator, &fbs); defer zf.deinit(); var out = [_]u8{0} ** 1024; var fbs_out = std.io.fixedBufferStream(&out); try zf.extract(0, &fbs, &fbs_out); try std.testing.expectEqualStrings("Hello, Zip!", fbs_out.getWritten()); fbs_out.reset(); try zf.extract(1, &fbs, &fbs_out); try std.testing.expectEqualStrings("hi there\n", fbs_out.getWritten()); } test "extract deflate" { const test_zip = @embedFile("deflate.zip"); var fbs = std.io.fixedBufferStream(test_zip); var zf = try @This().from(std.testing.allocator, &fbs); defer zf.deinit(); var out = [_]u8{0} ** 1024; var fbs_out = std.io.fixedBufferStream(&out); try std.testing.expectEqualStrings("Here is a comment :)", zf.file_comment(0)); try std.testing.expectEqual(@This().CompressionMethod.deflate, zf.central_directory_headers[0].compression_method); try zf.extract(0, &fbs, &fbs_out); try std.testing.expectEqualStrings(@embedFile("foo.txt"), fbs_out.getWritten()); }