wyag

Write yourself a git
Log | Files | Refs | README

commit edfc9d54897c7455eb32e1bada8639d8a3929f8b
parent 2682bea8663f8fb818209ef82d122af8e3db3312
Author: Martin Ashby <martin@ashbysoft.com>
Date:   Sun, 18 Aug 2024 22:02:59 +0100

Extract key-value-log-message parsing code from structure which uses it
(git commit)

It's clearly a little less efficient, maybe we could do some comptime
magic to make it runtime efficient.

Diffstat:
Mbuild.zig | 1+
Msrc/root.zig | 180++++++++++++++++++++++++++++++++++++++++++-------------------------------------
2 files changed, 96 insertions(+), 85 deletions(-)

diff --git a/build.zig b/build.zig @@ -38,6 +38,7 @@ pub fn build(b: *std.Build) void { }); const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + b.installArtifact(lib_unit_tests); const exe_unit_tests = b.addTest(.{ .root_source_file = b.path("src/main.zig"), diff --git a/src/root.zig b/src/root.zig @@ -304,65 +304,115 @@ const ObjectKind = enum { }; const Commit = struct { - const max_parents = 4; + _kvlm: Kvlm, tree: []const u8, - parents: std.ArrayList([]const u8), + parents: std.ArrayListUnmanaged([]const u8), author: []const u8, committer: ?[]const u8, gpgsig: ?[]const u8, message: []const u8, pub fn parse(a: std.mem.Allocator, z_reader: anytype) !Commit { + var kvlm = try Kvlm.parse(a, z_reader); + errdefer kvlm.deinit(a); + return .{ + ._kvlm = kvlm, + .tree = if (kvlm.headers.get("tree")) |tree| tree.items[0] else return error.InvalidCommit, + .parents = kvlm.headers.get("parent") orelse .{}, + .author = if (kvlm.headers.get("author")) |tree| tree.items[0] else return error.InvalidCommit, + .committer = if (kvlm.headers.get("committer")) |tree| tree.items[0] else null, + .gpgsig = if (kvlm.headers.get("gpgsig")) |tree| tree.items[0] else null, + .message = kvlm.message, + }; + } + + pub fn deinit(self: *Commit, a: std.mem.Allocator) void { + self._kvlm.deinit(a); + } +}; + +test "parse commit" { + const commit_str = + \\tree 29ff16c9c14e2652b22f8b78bb08a5a07930c147 + \\parent 206941306e8a8af65b66eaaaea388a7ae24d49a0 + \\author Thibault Polge <thibault@thb.lt> 1527025023 +0200 + \\committer Thibault Polge <thibault@thb.lt> 1527025044 +0200 + \\gpgsig -----BEGIN PGP SIGNATURE----- + \\ + \\ iQIzBAABCAAdFiEExwXquOM8bWb4Q2zVGxM2FxoLkGQFAlsEjZQACgkQGxM2FxoL + \\ kGQdcBAAqPP+ln4nGDd2gETXjvOpOxLzIMEw4A9gU6CzWzm+oB8mEIKyaH0UFIPh + \\ rNUZ1j7/ZGFNeBDtT55LPdPIQw4KKlcf6kC8MPWP3qSu3xHqx12C5zyai2duFZUU + \\ wqOt9iCFCscFQYqKs3xsHI+ncQb+PGjVZA8+jPw7nrPIkeSXQV2aZb1E68wa2YIL + \\ 3eYgTUKz34cB6tAq9YwHnZpyPx8UJCZGkshpJmgtZ3mCbtQaO17LoihnqPn4UOMr + \\ V75R/7FjSuPLS8NaZF4wfi52btXMSxO/u7GuoJkzJscP3p4qtwe6Rl9dc1XC8P7k + \\ NIbGZ5Yg5cEPcfmhgXFOhQZkD0yxcJqBUcoFpnp2vu5XJl2E5I/quIyVxUXi6O6c + \\ /obspcvace4wy8uO0bdVhc4nJ+Rla4InVSJaUaBeiHTW8kReSFYyMmDCzLjGIu1q + \\ doU61OM3Zv1ptsLu3gUE6GU27iWYj2RWN3e3HE4Sbd89IFwLXNdSuM0ifDLZk7AQ + \\ WBhRhipCCgZhkj9g2NEk7jRVslti1NdN5zoQLaJNqSwO1MtxTmJ15Ksk3QP6kfLB + \\ Q52UWybBzpaP9HEd4XnR+HuQ4k2K0ns2KgNImsNvIyFwbpMUyUWLMPimaV1DWUXo + \\ 5SBjDB/V/W2JBFR+XKHFJeFwYhj7DD/ocsGr4ZMx/lgc8rjIBkI= + \\ =lgTX + \\ -----END PGP SIGNATURE----- + \\ + \\Create first draft + ; + var fbs = std.io.fixedBufferStream(commit_str); + const rdr = fbs.reader(); + var commit = try Commit.parse(std.testing.allocator, rdr); + defer commit.deinit(std.testing.allocator); +} + +const Kvlm = struct { + headers: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged([]const u8)), + message: []const u8, + + pub fn parse(a: std.mem.Allocator, z_reader: anytype) !Kvlm { var cr = std.io.countingReader(z_reader); var reader = cr.reader(); var al = std.ArrayList(u8).init(a); defer al.deinit(); - var tree: ?[]const u8 = null; - var parents = std.ArrayList([]const u8).init(a); - var author: ?[]const u8 = null; - var committer: ?[]const u8 = null; - var gpgsig: ?[]const u8 = null; - var message: ?[]const u8 = null; - + var headers = std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged([]const u8)){}; errdefer { - if (tree) |t| a.free(t); - for (parents.items) |parent| a.free(parent); - parents.deinit(); - if (author) |t| a.free(t); - if (committer) |t| a.free(t); - if (gpgsig) |t| a.free(t); - if (message) |t| a.free(t); + var it = headers.iterator(); + while (it.next()) |i| { + a.free(i.key_ptr.*); + for (i.value_ptr.*.items) |ii| { + a.free(ii); + } + i.value_ptr.*.deinit(a); + } + headers.deinit(a); } - headers: while (true) { + headers_loop: while (true) { const b = try reader.readByte(); switch (b) { '\n' => { const b2 = try reader.readByte(); if (b2 == ' ') { try al.append(b); - continue :headers; + continue :headers_loop; } var spl = std.mem.splitScalar(u8, al.items, ' '); - const key = spl.first(); - const val = spl.rest(); - if (std.mem.eql(u8, key, "tree")) { - tree = try a.dupe(u8, val); - } else if (std.mem.eql(u8, key, "parent")) { - try parents.append(try a.dupe(u8, val)); - } else if (std.mem.eql(u8, key, "author")) { - author = try a.dupe(u8, val); - } else if (std.mem.eql(u8, key, "committer")) { - committer = try a.dupe(u8, val); - } else if (std.mem.eql(u8, key, "gpgsig")) { - gpgsig = try a.dupe(u8, val); + const key = try a.dupe(u8, spl.first()); + errdefer a.free(key); + const val = try a.dupe(u8, spl.rest()); + errdefer a.free(val); + var gpr = try headers.getOrPut(a, key); + if (gpr.found_existing) { + a.free(key); + try gpr.value_ptr.*.append(a, val); + } else { + gpr.value_ptr.* = .{}; + try gpr.value_ptr.append(a, val); } + al.clearRetainingCapacity(); if (b2 == '\n') { - break :headers; + break :headers_loop; } else { try al.append(b2); } @@ -375,65 +425,25 @@ const Commit = struct { // And the message is everything else try reader.readAllArrayList(&al, 1_000_000); - message = try al.toOwnedSlice(); + const message = try al.toOwnedSlice(); + errdefer a.free(message); return .{ - .tree = tree orelse { - std.log.err("no tree", .{}); - return error.InvalidCommit; - }, - .parents = parents, - .author = author orelse { - std.log.err("no author", .{}); - return error.InvalidCommit; - }, - .committer = committer, - .gpgsig = gpgsig, - .message = message orelse { - std.log.err("no message", .{}); - return error.InvalidCommit; - }, + .headers = headers, + .message = message, }; } - pub fn deinit(self: *Commit, a: std.mem.Allocator) void { - a.free(self.tree); - for (self.parents.items) |parent| a.free(parent); - self.parents.deinit(); - a.free(self.author); - if (self.committer) |committer| a.free(committer); - if (self.gpgsig) |gpgsig| a.free(gpgsig); + pub fn deinit(self: *Kvlm, a: std.mem.Allocator) void { + var it = self.headers.iterator(); + while (it.next()) |i| { + a.free(i.key_ptr.*); + for (i.value_ptr.*.items) |ii| { + a.free(ii); + } + i.value_ptr.*.deinit(a); + } + self.headers.deinit(a); a.free(self.message); } }; - -test "parse commit" { - const commit_str = - \\tree 29ff16c9c14e2652b22f8b78bb08a5a07930c147 - \\parent 206941306e8a8af65b66eaaaea388a7ae24d49a0 - \\author Thibault Polge <thibault@thb.lt> 1527025023 +0200 - \\committer Thibault Polge <thibault@thb.lt> 1527025044 +0200 - \\gpgsig -----BEGIN PGP SIGNATURE----- - \\ - \\ iQIzBAABCAAdFiEExwXquOM8bWb4Q2zVGxM2FxoLkGQFAlsEjZQACgkQGxM2FxoL - \\ kGQdcBAAqPP+ln4nGDd2gETXjvOpOxLzIMEw4A9gU6CzWzm+oB8mEIKyaH0UFIPh - \\ rNUZ1j7/ZGFNeBDtT55LPdPIQw4KKlcf6kC8MPWP3qSu3xHqx12C5zyai2duFZUU - \\ wqOt9iCFCscFQYqKs3xsHI+ncQb+PGjVZA8+jPw7nrPIkeSXQV2aZb1E68wa2YIL - \\ 3eYgTUKz34cB6tAq9YwHnZpyPx8UJCZGkshpJmgtZ3mCbtQaO17LoihnqPn4UOMr - \\ V75R/7FjSuPLS8NaZF4wfi52btXMSxO/u7GuoJkzJscP3p4qtwe6Rl9dc1XC8P7k - \\ NIbGZ5Yg5cEPcfmhgXFOhQZkD0yxcJqBUcoFpnp2vu5XJl2E5I/quIyVxUXi6O6c - \\ /obspcvace4wy8uO0bdVhc4nJ+Rla4InVSJaUaBeiHTW8kReSFYyMmDCzLjGIu1q - \\ doU61OM3Zv1ptsLu3gUE6GU27iWYj2RWN3e3HE4Sbd89IFwLXNdSuM0ifDLZk7AQ - \\ WBhRhipCCgZhkj9g2NEk7jRVslti1NdN5zoQLaJNqSwO1MtxTmJ15Ksk3QP6kfLB - \\ Q52UWybBzpaP9HEd4XnR+HuQ4k2K0ns2KgNImsNvIyFwbpMUyUWLMPimaV1DWUXo - \\ 5SBjDB/V/W2JBFR+XKHFJeFwYhj7DD/ocsGr4ZMx/lgc8rjIBkI= - \\ =lgTX - \\ -----END PGP SIGNATURE----- - \\ - \\Create first draft - ; - var fbs = std.io.fixedBufferStream(commit_str); - const rdr = fbs.reader(); - var commit = try Commit.parse(std.testing.allocator, rdr); - defer commit.deinit(std.testing.allocator); -}