From 41735c890cafb0fbc42cb785eb784c0162f22402 Mon Sep 17 00:00:00 2001 From: Martin Ashby Date: Thu, 14 Sep 2023 23:43:44 +0100 Subject: central directory headers can be read --- src/main.zig | 400 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 234 insertions(+), 166 deletions(-) (limited to 'src/main.zig') diff --git a/src/main.zig b/src/main.zig index ed52f53..653003b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,9 +1,59 @@ const std = @import("std"); -const testing = std.testing; -// https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.10.TXT +// ZIP file implementation +// See spec.txt. const ZipFile = struct { + allocator: std.mem.Allocator, + is_zip_64: bool = false, + end_of_central_directory_record: EndOfCentralDirectoryRecord, + central_directory_headers: []CentralDirectoryHeader, + fn from(allocator: std.mem.Allocator, file_or_stream: anytype) !ZipFile { + // Find the EndOfCentralDirectoryRecord. It must be in the last 64k of the file + const eocdr_search_width_max: usize = 64_000; + const epos = try file_or_stream.getEndPos(); + const eocdr_search_width: usize = @min(epos, eocdr_search_width_max); + const eocdr_seek_start: usize = epos - eocdr_search_width; + try file_or_stream.seekTo(eocdr_seek_start); + var reader = file_or_stream.reader(); + const needle = @byteSwap(EndOfCentralDirectoryRecord.SIG); + var window: u32 = try reader.readIntLittle(u32); + while (true) { + if (window == needle) { + try file_or_stream.seekBy(-4); + break; + } + const nb = try reader.readByte(); + window <<= 8; + window |= nb; + } else { + return error.EndOfCentralDirectoryRecordNotFound; + } + const eocdr = try EndOfCentralDirectoryRecord.read(allocator, file_or_stream); + errdefer eocdr.deinit(); + if (eocdr.disk_number_this != 0 or eocdr.disk_number_central_dir_start != 0) return error.SpansNotSupported; + if (eocdr.total_central_dir_entries != eocdr.total_central_dir_entries_on_this_disk) return error.SpansNotSupported; + + var central_directory_headers = try allocator.alloc(CentralDirectoryHeader, eocdr.total_central_dir_entries); + errdefer allocator.free(central_directory_headers); + try file_or_stream.seekTo(eocdr.central_dir_offset); + for (0..eocdr.total_central_dir_entries) |i| { + central_directory_headers[i] = try CentralDirectoryHeader.read(allocator, file_or_stream); + } + + return ZipFile{ + .allocator = allocator, + .end_of_central_directory_record = eocdr, + .central_directory_headers = central_directory_headers, + }; + } + fn deinit(self: *ZipFile) void { + self.end_of_central_directory_record.deinit(); + for (0..self.central_directory_headers.len) |i| { + self.central_directory_headers[i].deinit(); + } + self.allocator.free(self.central_directory_headers); + } // [local file header 1] // [encryption header 1] // [file data 1] @@ -27,201 +77,219 @@ const ZipFile = struct { // [end of central directory record] }; -const LocalFileHeader = packed struct { - const GPBF = packed struct(u16) { - encrypted: bool = false, - }; - const SIG: u32 = 0x04034b50; - sig: u32 = SIG, - // version needed to extract 2 bytes - general_purpose_bit_flag: GPBF, - // compression method 2 bytes - // last mod file time 2 bytes - // last mod file date 2 bytes - // crc-32 4 bytes - // compressed size 4 bytes - // uncompressed size 4 bytes - // file name length 2 bytes - // extra field length 2 bytes - // file name (variable size) - // extra field (variable size) -}; +// const LocalFileHeader = struct { +// const GPBF = packed struct(u16) { +// encrypted: bool = false, +// }; +// const SIG: [4]u8 = @bitCast(@as(u32, 0x04034b50)); +// sig: [4]u8 = SIG, +// // version needed to extract 2 bytes +// general_purpose_bit_flag: GPBF, +// // compression method 2 bytes +// // last mod file time 2 bytes +// // last mod file date 2 bytes +// // crc-32 4 bytes +// // compressed size 4 bytes +// // uncompressed size 4 bytes +// // file name length 2 bytes +// // extra field length 2 bytes +// // file name (variable size) +// // extra field (variable size) +// }; -const DataDescriptor = struct { - const SIG: u32 = 0x08074b50; - sig: u32 = SIG, - // crc-32 4 bytes - // compressed size 4 bytes - // uncompressed size 4 bytes -}; +// const DataDescriptor = struct { +// const SIG: [4]u8 = @bitCast(@as(u32, 0x08074b50)); +// sig: [4]u8 = SIG, +// // crc-32 4 bytes +// // compressed size 4 bytes +// // uncompressed size 4 bytes +// }; -const ArchiveExtraDataRecord = struct { - const SIG: u32 = 0x08064b50; - sig: u32 = SIG, - // extra field length 4 bytes - // extra field data (variable size) +// const ArchiveExtraDataRecord = struct { +// const SIG: [4]u8 = @bitCast(@as(u32, 0x08064b50)); +// sig: [4]u8 = SIG, +// // extra field length 4 bytes +// // extra field data (variable size) -}; +// }; -const CentralDirectoryHeader = packed struct { - const SIG: u32 = 0x02014b50; - // central file header signature 4 bytes () - sig: u32 = SIG, - // version made by 2 bytes +const CentralDirectoryHeader = struct { + const SIG: u32 = @as(u32, 0x02014b50); + allocator: std.mem.Allocator, version_made_by: u16, - // version needed to extract 2 bytes version_needed_to_extract: u16, - // general purpose bit flag 2 bytes general_purpose_bit_flag: u16, - // compression method 2 bytes compression_method: u16, - // last mod file time 2 bytes last_mod_file_time: u16, - // last mod file date 2 bytes last_mod_file_date: u16, - // crc-32 4 bytes crc32: u32, - // compressed size 4 bytes compressed_size: u32, - // uncompressed size 4 bytes uncompressed_size: u32, - // file name length 2 bytes file_name_length: u16, - // extra field length 2 bytes extra_field_length: u16, - // file comment length 2 bytes file_comment_length: u16, - // disk number start 2 bytes disk_number_start: u16, - // internal file attributes 2 bytes internal_file_attributes: u16, - // external file attributes 4 bytes external_file_attributes: u32, - // relative offset of local header 4 bytes - relative_offset_of_local_header: u16, + relative_offset_of_local_header: u32, + file_name: []const u8, + extra_field: []const u8, + file_comment: []const u8, - // file name (variable size) - // extra field (variable size) - // file comment (variable size) -}; + fn read(allocator: std.mem.Allocator, stream_or_file: anytype) !CentralDirectoryHeader { + var reader = stream_or_file.reader(); + const sig = try reader.readIntLittle(u32); + if (sig != CentralDirectoryHeader.SIG) { + std.log.err("invalid signature expected {x} got {x}", .{CentralDirectoryHeader.SIG, sig}); + return error.InvalidSignature; + } + const version_made_by = try reader.readIntLittle(u16); + const version_needed_to_extract = try reader.readIntLittle(u16); + const general_purpose_bit_flag = try reader.readIntLittle(u16); + const compression_method = try reader.readIntLittle(u16); + const last_mod_file_time = try reader.readIntLittle(u16); + const last_mod_file_date = try reader.readIntLittle(u16); + const crc32 = try reader.readIntLittle(u32); + const compressed_size = try reader.readIntLittle(u32); + const uncompressed_size = try reader.readIntLittle(u32); + const file_name_length = try reader.readIntLittle(u16); + const extra_field_length = try reader.readIntLittle(u16); + const file_comment_length = try reader.readIntLittle(u16); + const disk_number_start = try reader.readIntLittle(u16); + const internal_file_attributes = try reader.readIntLittle(u16); + const external_file_attributes = try reader.readIntLittle(u32); + const relative_offset_of_local_header = try reader.readIntLittle(u32); + const file_name = try allocator.alloc(u8, file_name_length); + errdefer allocator.free(file_name); + _ = try reader.readAll(file_name); + const extra_field = try allocator.alloc(u8, extra_field_length); + errdefer allocator.free(extra_field); + _ = try reader.readAll(extra_field); -const DigitalSignature = struct { - const SIG: u32 = 0x05054b50; - sig: u32 = SIG, - // size of data 2 bytes - // signature data (variable size) + const file_comment = try allocator.alloc(u8, file_comment_length); + errdefer allocator.free(file_comment); + _ = try reader.readAll(file_comment); + return CentralDirectoryHeader{ + .allocator = allocator, + .version_made_by = version_made_by, + .version_needed_to_extract = version_needed_to_extract, + .general_purpose_bit_flag = general_purpose_bit_flag, + .compression_method = compression_method, + .last_mod_file_time = last_mod_file_time, + .last_mod_file_date = last_mod_file_date, + .crc32 = crc32, + .compressed_size = compressed_size, + .uncompressed_size = uncompressed_size, + .file_name_length = file_name_length, + .extra_field_length = extra_field_length, + .file_comment_length = file_comment_length, + .disk_number_start = disk_number_start, + .internal_file_attributes = internal_file_attributes, + .external_file_attributes = external_file_attributes, + .relative_offset_of_local_header = relative_offset_of_local_header, + .file_name = file_name, + .extra_field = extra_field, + .file_comment = file_comment, + }; + } + fn deinit(self: *CentralDirectoryHeader) void { + self.allocator.free(self.file_name); + self.allocator.free(self.extra_field); + self.allocator.free(self.file_comment); + } }; -const Zip64EndOfCentralDirectoryRecord = struct { - const SIG: u32 = 0x06064b50; - sig: u32 = SIG, - // size of zip64 end of central - // directory record 8 bytes - // version made by 2 bytes - // version needed to extract 2 bytes - // number of this disk 4 bytes - // number of the disk with the - // start of the central directory 4 bytes - // total number of entries in the - // central directory on this disk 8 bytes - // total number of entries in the - // central directory 8 bytes - // size of the central directory 8 bytes - // offset of start of central - // directory with respect to - // the starting disk number 8 bytes - // zip64 extensible data sector (variable size) -}; +// const DigitalSignature = struct { +// const SIG: [4]u8 = @bitCast(@as(u32, 0x05054b50)); +// sig: [4]u8 = SIG, +// // size of data 2 bytes +// // signature data (variable size) +// }; -const Zip64EndOfCentralDirectoryLocator = struct { - const SIG: u32 = 0x07064b50; - sig: u32 = SIG, - // number of the disk with the - // start of the zip64 end of - // central directory 4 bytes - // relative offset of the zip64 - // end of central directory record 8 bytes - // total number of disks 4 bytes -}; +// const Zip64EndOfCentralDirectoryRecord = struct { +// const SIG: [4]u8 = @bitCast(@as(u32, 0x06064b50)); +// sig: [4]u8 = SIG, +// // size of zip64 end of central +// // directory record 8 bytes +// // version made by 2 bytes +// // version needed to extract 2 bytes +// // number of this disk 4 bytes +// // number of the disk with the +// // start of the central directory 4 bytes +// // total number of entries in the +// // central directory on this disk 8 bytes +// // total number of entries in the +// // central directory 8 bytes +// // size of the central directory 8 bytes +// // offset of start of central +// // directory with respect to +// // the starting disk number 8 bytes +// // zip64 extensible data sector (variable size) +// }; + +// const Zip64EndOfCentralDirectoryLocator = struct { +// const SIG: [4]u8 = @bitCast(@as(u32, 0x07064b50)); +// sig: [4]u8 = SIG, +// // number of the disk with the +// // start of the zip64 end of +// // central directory 4 bytes +// // relative offset of the zip64 +// // end of central directory record 8 bytes +// // total number of disks 4 bytes +// }; -const EndOfCentralDirectoryRecord = packed struct { - const SIG: u32 = 0x06054b50; - // // end of central dir signa SIG: u32 = 0x06054b50;ure 4 bytes (0x06054b50) - sig: u32 = SIG, // 504b0506 - // number of this disk 2 bytes - disk_number_this: u16, // 0000 - // number of the disk with the - // start of the central directory 2 bytes - disk_number_central_dir_start: u16, // 0000 - // total number of entries in the - // central directory on this disk 2 bytes - total_central_dir_entries_on_this_disk: u16, // 0100 - // total number of entries in - // the central directory 2 bytes - total_central_dir_entries: u16, // 0100 - // size of the central directory 4 bytes - size_of_central_dir: u32, // 4f000000 - // offset of start of central - // directory with respect to - // the starting disk number 4 bytes - central_dir_offset: u32, // 4e000000 - // .ZIP file comment length 2 bytes - comment_length: u16, // 0000 - // .ZIP file comment (variable size) - // comment: [*]u8, - - fn from(bytes: []const u8) !EndOfCentralDirectoryRecord { - var fbs = std.io.fixedBufferStream(bytes); - var rr = fbs.reader(); +const EndOfCentralDirectoryRecord = struct { + const SIG: u32 = @as(u32, 0x06054b50); + allocator: std.mem.Allocator, + disk_number_this: u16, + disk_number_central_dir_start: u16, + total_central_dir_entries_on_this_disk: u16, + total_central_dir_entries: u16, + size_of_central_dir: u32, + central_dir_offset: u32, + comment_length: u16, + comment: []const u8, + + fn read(allocator: std.mem.Allocator, file_or_stream: anytype) !EndOfCentralDirectoryRecord { + var reader = file_or_stream.reader(); + const sig = try reader.readIntLittle(u32); + if (sig != EndOfCentralDirectoryRecord.SIG) { + std.log.err("invalid signature expected {x} got {x}", .{EndOfCentralDirectoryRecord.SIG, sig}); + return error.InvalidSignature; + } + const disk_number_this = try reader.readIntLittle(u16); + const disk_number_central_dir_start = try reader.readIntLittle(u16); + const total_central_dir_entries_on_this_disk = try reader.readIntLittle(u16); + const total_central_dir_entries = try reader.readIntLittle(u16); + const size_of_central_dir = try reader.readIntLittle(u32); + const central_dir_offset = try reader.readIntLittle(u32); + const comment_length = try reader.readIntLittle(u16); + var comment = try allocator.alloc(u8, comment_length); + _ = try reader.readAll(comment); return EndOfCentralDirectoryRecord{ - .sig = try rr.readIntLittle(u32), - .disk_number_this = try rr.readIntLittle(u16), - .disk_number_central_dir_start = try rr.readIntLittle(u16), - .total_central_dir_entries_on_this_disk = try rr.readIntLittle(u16), - .total_central_dir_entries = try rr.readIntLittle(u16), - .size_of_central_dir = try rr.readIntLittle(u32), - .central_dir_offset = try rr.readIntLittle(u32), - .comment_length = try rr.readIntLittle(u16), - // .comment = rr.read() + .allocator = allocator, + .disk_number_this = disk_number_this, + .disk_number_central_dir_start = disk_number_central_dir_start, + .total_central_dir_entries_on_this_disk = total_central_dir_entries_on_this_disk, + .total_central_dir_entries = total_central_dir_entries, + .size_of_central_dir = size_of_central_dir, + .central_dir_offset = central_dir_offset, + .comment_length = comment_length, + .comment = comment, }; } + + fn deinit(self: *EndOfCentralDirectoryRecord) void { + self.allocator.free(self.comment); + } }; test "foo" { const test_zip = @embedFile("hello.zip"); var fbs = std.io.fixedBufferStream(test_zip); - - const eocdr_search_width_max: usize = 64_000; - var eocdr_search_buf: [eocdr_search_width_max]u8 = undefined; - - const epos = try fbs.getEndPos(); - const eocdr_search_width: usize = @min(epos, eocdr_search_width_max); - const eocdr_seek_start: usize = epos - eocdr_search_width; - - std.log.err("epos {}", .{epos}); - std.log.err("eocdr_search_width {}", .{eocdr_search_width}); - std.log.err("eocdr_seek_start {}", .{eocdr_seek_start}); - - try fbs.seekTo(eocdr_seek_start); - const eocdr_did_read = try fbs.read(&eocdr_search_buf); - std.log.err("eocdr_did_read {}", .{eocdr_did_read}); - const needle: [4]u8 = @bitCast(EndOfCentralDirectoryRecord.SIG); - const eocdr_start = std.mem.indexOf(u8, eocdr_search_buf[0..eocdr_search_width], &needle) orelse return error.NoEndOfCentralDirectoryRecord; - std.log.err("eocdr_start {}", .{eocdr_start}); - try fbs.seekTo(eocdr_start); - const eocdr_pos = try fbs.getPos(); - std.log.err("eocdr_pos {}", .{eocdr_pos}); - - // const eocdr = try fbs.reader().readStruct(EndOfCentralDirectoryRecord); - // _ = eocdr; - // _ = eocdr; - var rr = fbs.reader(); - const eocdrb = try rr.readAllAlloc(std.testing.allocator, 10_000_000); - defer std.testing.allocator.free(eocdrb); - const eocdr = try EndOfCentralDirectoryRecord.from(eocdrb); - _ = eocdr; - // const comment = try rr.readAllAlloc(std.testing.allocator, eocdr.comment_length); - // defer std.testing.allocator.free(comment); - std.log.err("eocdrb {} len {}", .{ std.fmt.fmtSliceHexLower(eocdrb), eocdrb.len }); - std.log.err("sz {}", .{@sizeOf(EndOfCentralDirectoryRecord)}); + var zf = try ZipFile.from(std.testing.allocator, &fbs); + defer zf.deinit(); + try std.testing.expectEqual(zf.central_directory_headers.len, 2); + try std.testing.expectEqualStrings(zf.central_directory_headers[0].file_name, "hello.txt"); + try std.testing.expectEqualStrings(zf.central_directory_headers[1].file_name, "foo.txt"); } -- cgit v1.2.3-ZIG