zipdl

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit a28a7c8db679d7a787be035035773abe8dadfff3
Author: Martin Ashby <martin@ashbysoft.com>
Date:   Sat,  7 Oct 2023 21:43:33 +0100

Initial.
Implementation of seekable HTTP range, which will be used for reading
ZIP over http without downloading the whole thing.

Diffstat:
A.gitignore | 2++
AREADME.md | 12++++++++++++
Abuild.zig | 39+++++++++++++++++++++++++++++++++++++++
Abuild.zig.zon | 12++++++++++++
Asrc/main.zig | 7+++++++
Asrc/seekable_http_range.zig | 243+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 315 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +zig-cache/ +zig-out/ diff --git a/README.md b/README.md @@ -0,0 +1,11 @@ +zipdl +===== + +Command line tool for downloading files out of a remote ZIP file from a web server that supports HTTP Range requests. + +Usage: +``` +$ zipdl URL FILE... +URL is the remote ZIP file to be accessed. +FILE is the filepath within the ZIP file to download. +``` +\ No newline at end of file diff --git a/build.zig b/build.zig @@ -0,0 +1,39 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "zipdl", + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + const zip = b.dependency("zip", .{ .target = target, .optimize = optimize }); + exe.addModule("zip", zip.module("zip")); + + b.installArtifact(exe); + + const run_cmd = b.addRunArtifact(exe); + run_cmd.step.dependOn(b.getInstallStep()); + + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + const unit_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + unit_tests.addModule("zip", zip.module("zip")); + + const run_unit_tests = b.addRunArtifact(unit_tests); + + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon @@ -0,0 +1,11 @@ +.{ + .name = "zipdl", + .version = "0.0.1", + .dependencies = .{ + .zip = .{ + .name = "zip", + .url = "https://code.mfashby.net/zip-zig/snapshot/zip-zig-2f3063fe4f20a318d5571e81a1db46f215ca8c6d.tar.xz", + .hash = "122036bad45a84f4c4c5c6bf4ba1f51422b1ceeae42d38a52acb53b1ec5c07838eea", + }, + }, +} +\ No newline at end of file diff --git a/src/main.zig b/src/main.zig @@ -0,0 +1,6 @@ +const std = @import("std"); +const SeekableHttpRange = @import("seekable_http_range.zig"); + +test { + _ = SeekableHttpRange; +} +\ No newline at end of file diff --git a/src/seekable_http_range.zig b/src/seekable_http_range.zig @@ -0,0 +1,243 @@ +//! Reader/SeekableStream implementation of a resource over HTTP using Range requests. + +const std = @import("std"); + +const SeekableHttpRange = @This(); + +pub const Opts = struct { + allocator: std.mem.Allocator, + client: *std.http.Client, + url: []const u8, + buffer_size: usize = 1024, +}; + +allocator: std.mem.Allocator, +client: *std.http.Client, +url: []const u8, +pos: u64 = 0, +endpos: u64, +buffer_pos: ?u64 = null, +buffer: []u8, + +pub fn init(opts: Opts) !SeekableHttpRange { + var a = opts.allocator; + var client = opts.client; + var url = opts.url; + var res = try client.fetch(a, .{ + .method = .HEAD, + .location = .{ .url = url }, + .headers = std.http.Headers{ .allocator = a, .owned = false }, + }); + defer res.deinit(); + if (res.status != .ok) return error.HttpStatusError; + const accept_header_val = res.headers.getFirstValue("accept-ranges") orelse return error.HttpRangeNotSpecified; + if (std.mem.eql(u8, accept_header_val, "none")) return error.HttpRangeNotSupported; + if (!std.mem.eql(u8, accept_header_val, "bytes")) return error.HttpRangeUnsupportedUnit; + const content_length_val = res.headers.getFirstValue("content-length") orelse return error.NoContentLength; + const content_length = std.fmt.parseInt(u64, content_length_val, 10) catch return error.ContentLengthFormatError; + var buffer = try a.alloc(u8, opts.buffer_size); + return .{ + .allocator = opts.allocator, + .client = client, + .url = url, + .buffer = buffer, + .endpos = content_length, + }; +} + +pub fn deinit(self: *SeekableHttpRange) void { + self.allocator.free(self.buffer); +} + +// it's a big list :( +pub const ReadError = error{ + UnsupportedHeader, + UnexpectedCharacter, + InvalidFormat, + InvalidPort, + OutOfMemory, + ConnectionRefused, + NetworkUnreachable, + ConnectionTimedOut, + ConnectionResetByPeer, + TemporaryNameServerFailure, + NameServerFailure, + UnknownHostName, + HostLacksNetworkAddresses, + UnexpectedConnectFailure, + TlsInitializationFailed, + UnsupportedUrlScheme, + UnexpectedWriteFailure, + InvalidContentLength, + UnsupportedTransferEncoding, + Overflow, + InvalidCharacter, + UriMissingHost, + CertificateBundleLoadFailure, + TlsFailure, + TlsAlert, + UnexpectedReadFailure, + EndOfStream, + HttpChunkInvalid, + SystemResources, + FileLocksNotSupported, + Unexpected, + AccessDenied, + NotWriteable, + MessageTooLong, + Unseekable, + InputOutput, + IsDir, + OperationAborted, + BrokenPipe, + NotOpenForReading, + NetNameDeleted, + WouldBlock, + MessageNotCompleted, + HttpHeadersExceededSizeLimit, + HttpHeadersInvalid, + HttpHeaderContinuationsUnsupported, + HttpTransferEncodingUnsupported, + HttpConnectionHeaderUnsupported, + CompressionNotSupported, + TooManyHttpRedirects, + RedirectRequiresResend, + HttpRedirectMissingLocation, + CompressionInitializationFailed, + DecompressionFailure, + InvalidTrailers, + StreamTooLong, + DiskQuota, + FileTooBig, + NoSpaceLeft, + DeviceBusy, + InvalidArgument, + NotOpenForWriting, + LockViolation, + HttpStatusError, + HttpNoBody, +}; + +pub const Reader = std.io.Reader(*SeekableHttpRange, ReadError, read); + +pub fn reader(self: *SeekableHttpRange) Reader { + return .{ + .context = self, + }; +} + +pub fn read(self: *SeekableHttpRange, buffer: []u8) ReadError!usize { + var n: usize = 0; + for (0..buffer.len) |ix| { + const b = try readByte(self); + const bb = b orelse break; + buffer[ix] = bb; + n += 1; + } + return n; +} + +fn readByte(self: *SeekableHttpRange) !?u8 { + if (self.pos >= self.endpos) return null; + + if (self.buffer_pos) |buffer_pos| { + const buffer_end: u64 = buffer_pos + self.buffer.len; + if (self.pos >= buffer_pos and self.pos < buffer_end) { + return self.readFromBuffer(); + } + } + + // refill the buffer from pos + // max u64 formatted as decimal is 20 bytes long + const range_buf_len = "bytes=-".len + 20 + 20; + var range_buf = [_]u8{0} ** range_buf_len; + const nbuf_end = @min(self.pos + self.buffer.len, self.endpos); + // Range request end is _inclusive_ + var range_value = std.fmt.bufPrint(&range_buf, "bytes={}-{}", .{ self.pos, nbuf_end - 1 }) catch unreachable; + var headers = std.http.Headers{ .allocator = self.allocator }; + defer headers.deinit(); + try headers.append("range", range_value); + var res = try self.client.fetch(self.allocator, .{ + .location = .{ .url = self.url }, + .headers = headers, + }); + defer res.deinit(); + if (res.status != .partial_content) return error.HttpStatusError; + const body = res.body orelse return error.HttpNoBody; + std.mem.copyForwards(u8, self.buffer, body); + self.buffer_pos = self.pos; + return self.readFromBuffer(); +} + +fn readFromBuffer(self: *SeekableHttpRange) u8 { + const pos_in_buffer = self.pos - self.buffer_pos.?; + defer self.pos += 1; + return self.buffer[pos_in_buffer]; +} + +pub const SeekError = error{}; +pub const GetSeekPosError = error{}; +pub const SeekableStream = std.io.SeekableStream(*SeekableHttpRange, SeekError, GetSeekPosError, seekTo, seekBy, getPos, getEndPos); + +pub fn seekableStream(self: *SeekableHttpRange) SeekableStream { + return .{ + .context = self, + }; +} + +// copying from FixedBufferStream: clamp rather than return an error +pub fn seekTo(self: *SeekableHttpRange, pos: u64) SeekError!void { + self.pos = std.math.clamp(pos, 0, self.endpos); +} + +// copying from FixedBufferStream: clamp rather than return an error +pub fn seekBy(self: *SeekableHttpRange, delta: i64) SeekError!void { + const np: u64 = if (std.math.sign(delta) == -1) + std.math.sub(u64, self.pos, std.math.absCast(delta)) catch 0 + else + std.math.add(u64, self.pos, std.math.absCast(delta)) catch std.math.maxInt(u64); + self.pos = std.math.clamp(np, 0, self.endpos); +} + +pub fn getPos(self: *SeekableHttpRange) GetSeekPosError!u64 { + return self.pos; +} + +pub fn getEndPos(self: *SeekableHttpRange) GetSeekPosError!u64 { + return self.endpos; +} + +test "endBytes" { + const a = std.testing.allocator; + var client = std.http.Client{ .allocator = a }; + defer client.deinit(); + var range = try SeekableHttpRange.init(.{ .allocator = a, .client = &client, .url = "https://mfashby.net/posts.zip" }); + defer range.deinit(); + var ss = range.seekableStream(); + var rr = range.reader(); + + var buf = try a.alloc(u8, 20); + defer a.free(buf); + + try ss.seekTo(try ss.getEndPos() - 20); + try rr.readNoEof(buf); + try std.testing.expectEqualSlices(u8, &[_]u8{ 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x31, 0x00, 0x31, 0x00, 0xFD, 0x11, 0x00, 0x00, 0xEE, 0xB7, 0x00, 0x00, 0x00, 0x00 }, buf); + + try ss.seekBy(-300); + try rr.readNoEof(buf); + try std.testing.expectEqualSlices(u8, &[_]u8{ 0x00, 0x00, 0x00, 0x08, 0x00, 0xA1, 0x3A, 0x17, 0x57, 0x85, 0x9F, 0x53, 0xCE, 0x26, 0x05, 0x00, 0x00, 0x37, 0x0A, 0x00 }, buf); + + try ss.seekTo(0); + try ss.seekBy(-1); + try std.testing.expectEqual(@as(u64, 0), try ss.getPos()); + + try ss.seekBy(std.math.minInt(i64)); + try std.testing.expectEqual(@as(u64, 0), try ss.getPos()); + + try ss.seekTo(try ss.getEndPos()); + try ss.seekBy(1); + try std.testing.expectEqual(try ss.getEndPos(), try ss.getPos()); + + try ss.seekBy(std.math.maxInt(i64)); + try std.testing.expectEqual(try ss.getEndPos(), try ss.getPos()); +}