wyag

Write yourself a git
Log | Files | Refs | README

commit 982ed6b3b799c0617e6d212c4c24549ccade33c8
parent edfc9d54897c7455eb32e1bada8639d8a3929f8b
Author: Martin Ashby <martin@ashbysoft.com>
Date:   Mon, 19 Aug 2024 22:20:49 +0100

Implement basic git log using graphvis

Slight restructure; add a GitObject structure for parsing the basic git
object wrapper and supplying a reader() which can be used to parse the
data contained therin.

Diffstat:
Msrc/root.zig | 159++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 126 insertions(+), 33 deletions(-)

diff --git a/src/root.zig b/src/root.zig @@ -29,6 +29,11 @@ pub fn doMain() !void { var hash_object_input: argparse.Positional = .{ .name = "file", .description = "The file to create a blob from" }; try hash_object.addPositional(&hash_object_input); + var log: argparse.Subcommand = .{ .parent = &ap, .description = "Log commits", .name = "log" }; + try ap.addSubcommand(&log); + var log_commit: argparse.Positional = .{ .name = "commit", .description = "The commit to log from" }; + try log.addPositional(&log_commit); + if (!try ap.parseOrHelp()) { return; } @@ -59,6 +64,12 @@ pub fn doMain() !void { std.log.err("No file supplied to hash-object", .{}); return error.InvalidArgs; } + } else if (log.wasExecuted) { + if (log_commit.value) |ref| { + try gitLog(a, std.fs.cwd(), ref, std.io.getStdOut().writer()); + } else { + std.log.err("No commit supplied to log", .{}); + } } else { if (ap.excess.items.len > 0) { std.log.err("Unsupported sub-command {s}, have you tried implementing it yourself?", .{ap.excess.items[0]}); @@ -69,6 +80,46 @@ pub fn doMain() !void { } } +fn gitLog(a: std.mem.Allocator, dir: Dir, ref: []const u8, writer: anytype) !void { + var repo = try repo_find(a, dir); + defer repo.deinit(); + var seen = std.AutoHashMap([20]u8, void).init(a); + defer seen.deinit(); + var sha: [20]u8 = undefined; + _ = try std.fmt.hexToBytes(&sha, ref); + try writer.writeAll("digraph wyaglog{\n"); + try writer.writeAll(" node[shape=rect]\n"); + try logGraphvis(a, repo, sha, &seen, writer.any()); + try writer.writeAll("}"); +} + +fn logGraphvis(a: std.mem.Allocator, repo: GitRepository, sha: [20]u8, seen: *std.AutoHashMap([20]u8, void), writer: std.io.AnyWriter) !void { + const gpr = try seen.getOrPut(sha); + if (gpr.found_existing) return; // short circuit we've already seen it + + var go = try repo.read_object_sha(a, sha); + defer go.deinit(); + if (go.kind != .commit) return error.NotACommit; + var commit = try Commit.parse(a, go.reader()); + defer commit.deinit(a); + + const ref = std.fmt.bytesToHex(sha, .lower); + var spl = std.mem.splitScalar(u8, commit.message, '\n'); + const short_msg_bare = std.mem.trim(u8, spl.first(), " "); + + const short_msg_1 = try std.mem.replaceOwned(u8, a, short_msg_bare, "\\", "\\\\"); + defer a.free(short_msg_1); + const short_msg_2 = try std.mem.replaceOwned(u8, a, short_msg_1, "\"", "\\\""); + defer a.free(short_msg_2); + try std.fmt.format(writer, " c_{s} [label=\"{s}: {s}\"]\n", .{ ref, ref[0..7], short_msg_2 }); + for (commit.parents.items) |parent_ref| { + try std.fmt.format(writer, " c_{s} -> c_{s};\n", .{ ref, parent_ref }); + var child_sha: [20]u8 = undefined; + _ = try std.fmt.hexToBytes(&child_sha, parent_ref); + try logGraphvis(a, repo, child_sha, seen, writer); + } +} + fn repoCreate(ca: std.mem.Allocator, path: []const u8) !GitRepository { var worktree: Dir = undefined; { @@ -103,7 +154,18 @@ fn catFile(a: std.mem.Allocator, cwd: std.fs.Dir, kind_str: []const u8, ref: []c const kind = try enumFromString(kind_str, ObjectKind); var repo = try repo_find(a, cwd); defer repo.deinit(); - try repo.read_object(a, kind, ref, writer); + var git_object = try repo.read_object(a, ref); + defer git_object.deinit(); + if (git_object.kind != kind) { + return error.WrongKind; + } + var rdr = git_object.reader(); + var buf = [_]u8{0} ** std.mem.page_size; + while (true) { + const sz = try rdr.read(&buf); + if (sz == 0) break; + try writer.writeAll(buf[0..sz]); + } } fn hashObject(a: std.mem.Allocator, cwd: std.fs.Dir, file: []const u8, write: bool, writer: std.io.AnyWriter) !void { @@ -118,6 +180,46 @@ fn hashObject(a: std.mem.Allocator, cwd: std.fs.Dir, file: []const u8, write: bo try writer.writeByte('\n'); } +pub const GitObject = struct { + const ReadError = std.compress.zlib.Decompressor(std.fs.File.Reader).Error || error{ TooLong, InvalidHash }; + + kind: ObjectKind, + len: u64, + sha1: [20]u8, + + _file: std.fs.File, + _decomp: std.compress.zlib.Decompressor(std.fs.File.Reader), + _sha1: std.crypto.hash.Sha1, + _read: u64, + + pub fn read(self: *GitObject, buffer: []u8) ReadError!usize { + const sz = try self._decomp.read(buffer); + if (self._read + sz > self.len) { + return error.TooLong; + } + self._read += sz; + if (sz > 0) { + self._sha1.update(buffer[0..sz]); + } else { + const final = self._sha1.finalResult(); + if (!std.mem.eql(u8, &self.sha1, &final)) { + return error.InvalidHash; + } + } + return sz; + } + + pub fn reader(self: *GitObject) std.io.Reader(*GitObject, ReadError, GitObject.read) { + return .{ + .context = self, + }; + } + + pub fn deinit(self: *GitObject) void { + self._file.close(); + } +}; + pub const GitRepository = struct { worktree: Dir, gitdir: Dir, @@ -148,15 +250,16 @@ pub const GitRepository = struct { self._aa.deinit(); } - // sha is in hex-lower format - // raw uncompressed contents will be written to writer. + pub fn read_object_sha(self: GitRepository, a: std.mem.Allocator, sha: [20]u8) !GitObject { + var ref = std.fmt.bytesToHex(sha, .lower); + return self.read_object(a, &ref); + } + pub fn read_object( self: GitRepository, a: std.mem.Allocator, - expected_kind: ObjectKind, ref: []const u8, - writer: std.io.AnyWriter, - ) !void { + ) !GitObject { if (ref.len != 40) { std.log.err("Invalid ref in read_object {s}", .{ref}); return error.InvalidArgs; @@ -164,42 +267,32 @@ pub const GitRepository = struct { const path = try std.fs.path.join(a, &.{ "objects", ref[0..2], ref[2..] }); defer a.free(path); var file = try self.gitdir.openFile(path, .{}); - defer file.close(); - var buffer = std.ArrayList(u8).init(a); - defer buffer.deinit(); - const buffer_writer = buffer.writer(); + errdefer file.close(); const file_reader = file.reader(); var decomp = std.compress.zlib.decompressor(file_reader); const decomp_reader = decomp.reader(); var sha1 = std.crypto.hash.Sha1.init(.{}); - var hashed = std.compress.hashedReader(decomp_reader, &sha1); - var hashed_reader = hashed.reader(); - try hashed_reader.streamUntilDelimiter(buffer_writer, '\x00', null); - var spl = std.mem.splitScalar(u8, buffer.items, ' '); + const head = try decomp_reader.readUntilDelimiterAlloc(a, '\x00', 1024); + defer a.free(head); + sha1.update(head); + sha1.update(&.{'\x00'}); + var spl = std.mem.splitScalar(u8, head, ' '); const kind_str = spl.first(); const len_str = spl.rest(); const kind = try enumFromString(kind_str, ObjectKind); - if (kind != expected_kind) { - return error.WrongKind; - } const len = try std.fmt.parseInt(u64, len_str, 10); - - var limited = std.io.limitedReader(hashed_reader, len); - var limited_reader = limited.reader(); - - var buf = [_]u8{0} ** std.mem.page_size; - while (true) { - const sz = try limited_reader.read(&buf); - if (sz == 0) break; - try writer.writeAll(buf[0..sz]); - } - const ref_cmp = try std.fmt.allocPrint(a, "{s}", .{std.fmt.fmtSliceHexLower(&sha1.finalResult())}); - defer a.free(ref_cmp); - if (!std.mem.eql(u8, ref, ref_cmp)) { - std.log.err("got {s} expected {s}", .{ ref, ref_cmp }); - return error.InvalidHash; - } + var expected_sha1: [20]u8 = undefined; + _ = try std.fmt.hexToBytes(&expected_sha1, ref); + return .{ + .kind = kind, + .len = len, + .sha1 = expected_sha1, + ._file = file, + ._decomp = decomp, + ._sha1 = sha1, + ._read = 0, + }; } // Caller owns the response