wyag

Write yourself a git
Log | Files | Refs | README

commit bde0623549f145c4227232a82a60c0db3032d296
parent 929cfb179ecc9f7fe6c577181b158b6b9c63caf3
Author: Martin Ashby <martin@ashbysoft.com>
Date:   Wed, 14 Aug 2024 23:02:37 +0100

Add read-file and hash-object subcommands

Diffstat:
Msrc/root.zig | 152+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 149 insertions(+), 3 deletions(-)

diff --git a/src/root.zig b/src/root.zig @@ -3,6 +3,15 @@ const argparse = @import("argparse.zig"); const IniFile = @import("inifile.zig"); const Dir = std.fs.Dir; +const ObjectKind = enum { + commit, + tree, + tag, + blob, +}; + +var prng = std.rand.DefaultPrng.init(123); + pub fn doMain() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); @@ -14,6 +23,18 @@ pub fn doMain() !void { var init_path: argparse.Positional = .{ .name = "path", .description = "The directory in which to make a git repository" }; try init.addPositional(&init_path); + var cat_file: argparse.Subcommand = .{ .parent = &ap, .description = "Read the contents of an object", .name = "cat-file" }; + try ap.addSubcommand(&cat_file); + var cat_file_ref: argparse.Positional = .{ .name = "ref", .description = "The object reference to read" }; + try cat_file.addPositional(&cat_file_ref); + + var hash_object: argparse.Subcommand = .{ .parent = &ap, .description = "Create a blob from a file", .name = "hash-object" }; + try ap.addSubcommand(&hash_object); + var hash_object_write: argparse.Flag = .{ .long = "write", .short = "w", .description = "Actually write the file to the repository", .hasarg = false }; + try hash_object.addFlag(&hash_object_write); + var hash_object_input: argparse.Positional = .{ .name = "file", .description = "The file to create a blob from" }; + try hash_object.addPositional(&hash_object_input); + if (!try ap.parseOrHelp()) { return; } @@ -25,6 +46,33 @@ pub fn doMain() !void { std.log.err("No 'path' provided to init", .{}); return error.InvalidArgs; } + } else if (cat_file.wasExecuted) { + if (cat_file_ref.value) |ref| { + var repo = try repo_find(a, std.fs.cwd()); + defer repo.deinit(); + const kind = try repo.read_object(a, ref, std.io.getStdOut().writer().any()); + std.log.info("Object kind: {s}", .{@tagName(kind)}); + } else { + std.log.err("No ref provided to cat-file", .{}); + return error.InvalidArgs; + } + } else if (hash_object.wasExecuted) { + if (hash_object_input.value) |input| { + var repo = try repo_find(a, std.fs.cwd()); + defer repo.deinit(); + const cwd = std.fs.cwd(); + const stat = try cwd.statFile(input); + var infile = try std.fs.cwd().openFile(input, .{}); + const rdr = infile.reader(); + const sha = try repo.write_object(a, stat.size, rdr, .blob, hash_object_write.waspresent); + defer a.free(sha); + const stdout = std.io.getStdOut().writer(); + try stdout.writeAll(sha); + try stdout.writeByte('\n'); + } else { + std.log.err("No file supplied to hash-object", .{}); + return error.InvalidArgs; + } } else { if (ap.excess.items.len > 0) { std.log.err("Unsupported sub-command {s}, have you tried implementing it yourself?", .{ap.excess.items[0]}); @@ -64,6 +112,94 @@ pub const GitRepository = struct { safeclose(&self.worktree); self._aa.deinit(); } + + // sha is in hex-lower format + // raw uncompressed contents will be written to writer. + pub fn read_object(self: GitRepository, a: std.mem.Allocator, ref: []const u8, writer: std.io.AnyWriter) !ObjectKind { + if (ref.len != 40) { + std.log.err("Invalid ref in read_object {s}", .{ref}); + return error.InvalidArgs; + } + const path = try std.fs.path.join(a, &.{ "objects", ref[0..2], ref[2..] }); + defer a.free(path); + var file = try self.gitdir.openFile(path, .{}); + defer file.close(); + var buffer = std.ArrayList(u8).init(a); + defer buffer.deinit(); + const buffer_writer = buffer.writer(); + const file_reader = file.reader(); + var decomp = std.compress.zlib.decompressor(file_reader); + const decomp_reader = decomp.reader(); + var sha1 = std.crypto.hash.Sha1.init(.{}); + var hashed = std.compress.hashedReader(decomp_reader, &sha1); + var hashed_reader = hashed.reader(); + + try hashed_reader.streamUntilDelimiter(buffer_writer, '\x00', null); + var spl = std.mem.splitScalar(u8, buffer.items, ' '); + const kind_str = spl.first(); + const len_str = spl.rest(); + const kind = try enumFromString(kind_str, ObjectKind); + const len = try std.fmt.parseInt(u64, len_str, 10); + + var limited = std.io.limitedReader(hashed_reader, len); + var limited_reader = limited.reader(); + + var buf = [_]u8{0} ** std.mem.page_size; + while (true) { + const sz = try limited_reader.read(&buf); + if (sz == 0) break; + try writer.writeAll(buf[0..sz]); + } + const ref_cmp = try std.fmt.allocPrint(a, "{s}", .{std.fmt.fmtSliceHexLower(&sha1.finalResult())}); + defer a.free(ref_cmp); + if (!std.mem.eql(u8, ref, ref_cmp)) { + std.log.err("got {s} expected {s}", .{ ref, ref_cmp }); + return error.InvalidHash; + } + return kind; + } + + // Caller owns the response + pub fn write_object(self: GitRepository, ca: std.mem.Allocator, len: u64, reader: anytype, kind: ObjectKind, write: bool) ![]const u8 { + var aa = std.heap.ArenaAllocator.init(ca); // 3 lines saves many little 'free' calls through this function. + defer aa.deinit(); + const a = aa.allocator(); + // Write the stuff to a temporary file and calculate the hash + try self.gitdir.makePath("tmp"); + var sha1 = std.crypto.hash.Sha1.init(.{}); + var rndm = prng.random(); + const tmpfilename = try std.fmt.allocPrint(a, "wyag-{}", .{rndm.int(u16)}); + const tmpfilepath = try std.fs.path.join(a, &.{ "tmp", tmpfilename }); + defer self.gitdir.deleteFile(tmpfilepath) catch {}; // not much we can do about this. + { + var tmpfile = try self.gitdir.createFile(tmpfilepath, .{}); + const tmpfile_writer = tmpfile.writer(); + var comp = try std.compress.zlib.compressor(tmpfile_writer, .{}); + const comp_writer = comp.writer(); + var comp_and_hash = std.compress.hashedWriter(comp_writer, &sha1); + const comp_and_hash_writer = comp_and_hash.writer(); + + try std.fmt.format(comp_and_hash_writer, "{s} {}\x00", .{ @tagName(kind), len }); // header + var limited = std.io.limitedReader(reader, len); + var limited_reader = limited.reader(); + var buf = [_]u8{0} ** std.mem.page_size; + while (true) { + const sz = try limited_reader.read(&buf); + if (sz == 0) break; + try comp_and_hash_writer.writeAll(buf[0..sz]); + } + try comp.finish(); + } + + const ref = try std.fmt.allocPrint(a, "{s}", .{std.fmt.fmtSliceHexLower(&sha1.finalResult())}); + // now rename it into place once you have the hash + if (write) { + const path = try std.fs.path.join(a, &.{ "objects", ref[0..2], ref[2..] }); + std.log.info("renaming {s} to {s}", .{ tmpfilepath, path }); + try self.gitdir.rename(tmpfilepath, path); + } + return try ca.dupe(u8, ref); + } }; test "init repo" { @@ -107,7 +243,7 @@ fn repo_find(a: std.mem.Allocator, dir: Dir) !GitRepository { const stat = dir.statFile(".git") catch |e| switch (e) { error.FileNotFound => { // try the parent - var parentdir = dir.openDir("..", .{.iterate = true}) catch |e2| switch (e2) { + var parentdir = dir.openDir("..", .{ .iterate = true }) catch |e2| switch (e2) { error.FileNotFound => return error.NoGitDirFound, else => return e2, }; @@ -126,12 +262,22 @@ fn repo_find(a: std.mem.Allocator, dir: Dir) !GitRepository { fn safeclose(dir: *Dir) void { if (std.fs.cwd().fd != dir.fd) { dir.close(); - } + } } test "repo_find" { - const srcdir = try std.fs.cwd().openDir("src/foo/bar/baz", .{.iterate = true}); + const srcdir = try std.fs.cwd().openDir("src/foo/bar/baz", .{ .iterate = true }); var gr = try repo_find(std.testing.allocator, srcdir); defer gr.deinit(); } +fn enumFromString(str: []const u8, enum_type: type) !enum_type { + const ti = @typeInfo(enum_type); + inline for (ti.Enum.fields) |field| { + if (std.mem.eql(u8, str, field.name)) { + return @enumFromInt(field.value); + } + } else { + return error.InvalidEnum; + } +}