wyag

Write yourself a git
Log | Files | Refs | README

commit 2682bea8663f8fb818209ef82d122af8e3db3312
parent 033414702664ab80f973b9304a6329fc6efe9d40
Author: Martin Ashby <martin@ashbysoft.com>
Date:   Sun, 18 Aug 2024 21:18:44 +0100

Implement commit parsing

More small rearrangement

Bit more code rearrangement

Diffstat:
Msrc/root.zig | 256+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 204 insertions(+), 52 deletions(-)

diff --git a/src/root.zig b/src/root.zig @@ -3,16 +3,8 @@ const argparse = @import("argparse.zig"); const IniFile = @import("inifile.zig"); const Dir = std.fs.Dir; -const ObjectKind = enum { - commit, - tree, - tag, - blob, -}; - -var prng = std.rand.DefaultPrng.init(123); - pub fn doMain() !void { + prng = std.rand.DefaultPrng.init(@intCast(std.time.timestamp())); var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const a = gpa.allocator(); @@ -25,7 +17,9 @@ pub fn doMain() !void { var cat_file: argparse.Subcommand = .{ .parent = &ap, .description = "Read the contents of an object", .name = "cat-file" }; try ap.addSubcommand(&cat_file); - var cat_file_ref: argparse.Positional = .{ .name = "ref", .description = "The object reference to read" }; + var cat_file_kind: argparse.Positional = .{ .name = "type", .description = "The type of the object to be read" }; + try cat_file.addPositional(&cat_file_kind); + var cat_file_ref: argparse.Positional = .{ .name = "object", .description = "The object reference to read" }; try cat_file.addPositional(&cat_file_ref); var hash_object: argparse.Subcommand = .{ .parent = &ap, .description = "Create a blob from a file", .name = "hash-object" }; @@ -40,7 +34,7 @@ pub fn doMain() !void { } if (init.wasExecuted) { if (init_path.value) |path| { - var repo = try repo_create(a, path); + var repo = try repoCreate(a, path); defer repo.deinit(); } else { std.log.err("No 'path' provided to init", .{}); @@ -48,7 +42,12 @@ pub fn doMain() !void { } } else if (cat_file.wasExecuted) { if (cat_file_ref.value) |ref| { - try catFile(a, std.fs.cwd(), ref, std.io.getStdOut().writer().any()); + if (cat_file_kind.value) |kind_str| { + try catFile(a, std.fs.cwd(), kind_str, ref, std.io.getStdOut().writer().any()); + } else { + std.log.err("No type provided to cat-file", .{}); + return error.InvalidArgs; + } } else { std.log.err("No ref provided to cat-file", .{}); return error.InvalidArgs; @@ -70,10 +69,41 @@ pub fn doMain() !void { } } -fn catFile(a: std.mem.Allocator, cwd: std.fs.Dir, ref: []const u8, writer: std.io.AnyWriter) !void { +fn repoCreate(ca: std.mem.Allocator, path: []const u8) !GitRepository { + var worktree: Dir = undefined; + { + var aa = std.heap.ArenaAllocator.init(ca); + defer aa.deinit(); + const a = aa.allocator(); + const cwd = std.fs.cwd(); + worktree = try cwd.makeOpenPath(path, .{}); + errdefer worktree.close(); + var gitdir = try worktree.makeOpenPath(".git", .{}); + defer gitdir.close(); + try gitdir.makePath("branches"); + try gitdir.makePath("objects"); + try gitdir.makePath(try std.fs.path.join(a, &.{ "refs", "tags" })); + try gitdir.makePath(try std.fs.path.join(a, &.{ "refs", "heads" })); + var headfile = try gitdir.createFile("HEAD", .{}); + defer headfile.close(); + try headfile.writeAll("ref: refs/heads/main\n"); + var configfile = try gitdir.createFile("config", .{}); + defer configfile.close(); + try configfile.writeAll( + \\[core] + \\ repositoryformatversion = 0 + \\ filemode = false + \\ bare = false + ); + } + return try GitRepository.init(ca, worktree); +} + +fn catFile(a: std.mem.Allocator, cwd: std.fs.Dir, kind_str: []const u8, ref: []const u8, writer: std.io.AnyWriter) !void { + const kind = try enumFromString(kind_str, ObjectKind); var repo = try repo_find(a, cwd); defer repo.deinit(); - _ = try repo.read_object(a, ref, writer); + try repo.read_object(a, kind, ref, writer); } fn hashObject(a: std.mem.Allocator, cwd: std.fs.Dir, file: []const u8, write: bool, writer: std.io.AnyWriter) !void { @@ -120,7 +150,13 @@ pub const GitRepository = struct { // sha is in hex-lower format // raw uncompressed contents will be written to writer. - pub fn read_object(self: GitRepository, a: std.mem.Allocator, ref: []const u8, writer: std.io.AnyWriter) !ObjectKind { + pub fn read_object( + self: GitRepository, + a: std.mem.Allocator, + expected_kind: ObjectKind, + ref: []const u8, + writer: std.io.AnyWriter, + ) !void { if (ref.len != 40) { std.log.err("Invalid ref in read_object {s}", .{ref}); return error.InvalidArgs; @@ -144,6 +180,9 @@ pub const GitRepository = struct { const kind_str = spl.first(); const len_str = spl.rest(); const kind = try enumFromString(kind_str, ObjectKind); + if (kind != expected_kind) { + return error.WrongKind; + } const len = try std.fmt.parseInt(u64, len_str, 10); var limited = std.io.limitedReader(hashed_reader, len); @@ -161,7 +200,6 @@ pub const GitRepository = struct { std.log.err("got {s} expected {s}", .{ ref, ref_cmp }); return error.InvalidHash; } - return kind; } // Caller owns the response @@ -211,36 +249,6 @@ test "init repo" { defer gr.deinit(); } -fn repo_create(ca: std.mem.Allocator, path: []const u8) !GitRepository { - var worktree: Dir = undefined; - { - var aa = std.heap.ArenaAllocator.init(ca); - defer aa.deinit(); - const a = aa.allocator(); - const cwd = std.fs.cwd(); - worktree = try cwd.makeOpenPath(path, .{}); - errdefer worktree.close(); - var gitdir = try worktree.makeOpenPath(".git", .{}); - defer gitdir.close(); - try gitdir.makePath("branches"); - try gitdir.makePath("objects"); - try gitdir.makePath(try std.fs.path.join(a, &.{ "refs", "tags" })); - try gitdir.makePath(try std.fs.path.join(a, &.{ "refs", "heads" })); - var headfile = try gitdir.createFile("HEAD", .{}); - defer headfile.close(); - try headfile.writeAll("ref: refs/heads/main\n"); - var configfile = try gitdir.createFile("config", .{}); - defer configfile.close(); - try configfile.writeAll( - \\[core] - \\ repositoryformatversion = 0 - \\ filemode = false - \\ bare = false - ); - } - return try GitRepository.init(ca, worktree); -} - // takes ownership of "dir", the variable should not be used // by any other code after calling this function. fn repo_find(a: std.mem.Allocator, dir: Dir) !GitRepository { @@ -263,18 +271,18 @@ fn repo_find(a: std.mem.Allocator, dir: Dir) !GitRepository { } } +// test "repo_find" { +// const srcdir = try std.fs.cwd().openDir("src/foo/bar/baz", .{ .iterate = true }); +// var gr = try repo_find(std.testing.allocator, srcdir); +// defer gr.deinit(); +// } + fn safeclose(dir: *Dir) void { if (std.fs.cwd().fd != dir.fd) { dir.close(); } } -test "repo_find" { - const srcdir = try std.fs.cwd().openDir("src/foo/bar/baz", .{ .iterate = true }); - var gr = try repo_find(std.testing.allocator, srcdir); - defer gr.deinit(); -} - fn enumFromString(str: []const u8, enum_type: type) !enum_type { const ti = @typeInfo(enum_type); inline for (ti.Enum.fields) |field| { @@ -285,3 +293,147 @@ fn enumFromString(str: []const u8, enum_type: type) !enum_type { return error.InvalidEnum; } } + +var prng: std.rand.DefaultPrng = undefined; + +const ObjectKind = enum { + commit, + tree, + tag, + blob, +}; + +const Commit = struct { + const max_parents = 4; + tree: []const u8, + parents: std.ArrayList([]const u8), + author: []const u8, + committer: ?[]const u8, + gpgsig: ?[]const u8, + message: []const u8, + + pub fn parse(a: std.mem.Allocator, z_reader: anytype) !Commit { + var cr = std.io.countingReader(z_reader); + var reader = cr.reader(); + + var al = std.ArrayList(u8).init(a); + defer al.deinit(); + + var tree: ?[]const u8 = null; + var parents = std.ArrayList([]const u8).init(a); + var author: ?[]const u8 = null; + var committer: ?[]const u8 = null; + var gpgsig: ?[]const u8 = null; + var message: ?[]const u8 = null; + + errdefer { + if (tree) |t| a.free(t); + for (parents.items) |parent| a.free(parent); + parents.deinit(); + if (author) |t| a.free(t); + if (committer) |t| a.free(t); + if (gpgsig) |t| a.free(t); + if (message) |t| a.free(t); + } + + headers: while (true) { + const b = try reader.readByte(); + switch (b) { + '\n' => { + const b2 = try reader.readByte(); + if (b2 == ' ') { + try al.append(b); + continue :headers; + } + + var spl = std.mem.splitScalar(u8, al.items, ' '); + const key = spl.first(); + const val = spl.rest(); + if (std.mem.eql(u8, key, "tree")) { + tree = try a.dupe(u8, val); + } else if (std.mem.eql(u8, key, "parent")) { + try parents.append(try a.dupe(u8, val)); + } else if (std.mem.eql(u8, key, "author")) { + author = try a.dupe(u8, val); + } else if (std.mem.eql(u8, key, "committer")) { + committer = try a.dupe(u8, val); + } else if (std.mem.eql(u8, key, "gpgsig")) { + gpgsig = try a.dupe(u8, val); + } + al.clearRetainingCapacity(); + if (b2 == '\n') { + break :headers; + } else { + try al.append(b2); + } + }, + else => { + try al.append(b); + }, + } + } + + // And the message is everything else + try reader.readAllArrayList(&al, 1_000_000); + message = try al.toOwnedSlice(); + + return .{ + .tree = tree orelse { + std.log.err("no tree", .{}); + return error.InvalidCommit; + }, + .parents = parents, + .author = author orelse { + std.log.err("no author", .{}); + return error.InvalidCommit; + }, + .committer = committer, + .gpgsig = gpgsig, + .message = message orelse { + std.log.err("no message", .{}); + return error.InvalidCommit; + }, + }; + } + + pub fn deinit(self: *Commit, a: std.mem.Allocator) void { + a.free(self.tree); + for (self.parents.items) |parent| a.free(parent); + self.parents.deinit(); + a.free(self.author); + if (self.committer) |committer| a.free(committer); + if (self.gpgsig) |gpgsig| a.free(gpgsig); + a.free(self.message); + } +}; + +test "parse commit" { + const commit_str = + \\tree 29ff16c9c14e2652b22f8b78bb08a5a07930c147 + \\parent 206941306e8a8af65b66eaaaea388a7ae24d49a0 + \\author Thibault Polge <thibault@thb.lt> 1527025023 +0200 + \\committer Thibault Polge <thibault@thb.lt> 1527025044 +0200 + \\gpgsig -----BEGIN PGP SIGNATURE----- + \\ + \\ iQIzBAABCAAdFiEExwXquOM8bWb4Q2zVGxM2FxoLkGQFAlsEjZQACgkQGxM2FxoL + \\ kGQdcBAAqPP+ln4nGDd2gETXjvOpOxLzIMEw4A9gU6CzWzm+oB8mEIKyaH0UFIPh + \\ rNUZ1j7/ZGFNeBDtT55LPdPIQw4KKlcf6kC8MPWP3qSu3xHqx12C5zyai2duFZUU + \\ wqOt9iCFCscFQYqKs3xsHI+ncQb+PGjVZA8+jPw7nrPIkeSXQV2aZb1E68wa2YIL + \\ 3eYgTUKz34cB6tAq9YwHnZpyPx8UJCZGkshpJmgtZ3mCbtQaO17LoihnqPn4UOMr + \\ V75R/7FjSuPLS8NaZF4wfi52btXMSxO/u7GuoJkzJscP3p4qtwe6Rl9dc1XC8P7k + \\ NIbGZ5Yg5cEPcfmhgXFOhQZkD0yxcJqBUcoFpnp2vu5XJl2E5I/quIyVxUXi6O6c + \\ /obspcvace4wy8uO0bdVhc4nJ+Rla4InVSJaUaBeiHTW8kReSFYyMmDCzLjGIu1q + \\ doU61OM3Zv1ptsLu3gUE6GU27iWYj2RWN3e3HE4Sbd89IFwLXNdSuM0ifDLZk7AQ + \\ WBhRhipCCgZhkj9g2NEk7jRVslti1NdN5zoQLaJNqSwO1MtxTmJ15Ksk3QP6kfLB + \\ Q52UWybBzpaP9HEd4XnR+HuQ4k2K0ns2KgNImsNvIyFwbpMUyUWLMPimaV1DWUXo + \\ 5SBjDB/V/W2JBFR+XKHFJeFwYhj7DD/ocsGr4ZMx/lgc8rjIBkI= + \\ =lgTX + \\ -----END PGP SIGNATURE----- + \\ + \\Create first draft + ; + var fbs = std.io.fixedBufferStream(commit_str); + const rdr = fbs.reader(); + var commit = try Commit.parse(std.testing.allocator, rdr); + defer commit.deinit(std.testing.allocator); +}