commit 25f18c4261eac8834bb96809c5e3b9f17b5d536d
Author: Martin Ashby <martin@ashbysoft.com>
Date: Tue, 30 Jan 2024 11:39:05 +0000
Initial
Diffstat:
A | .gitignore | | | 2 | ++ |
A | README.md | | | 5 | +++++ |
A | build.zig | | | 91 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | build.zig.zon | | | 62 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/main.zig | | | 155 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/root.zig | | | 10 | ++++++++++ |
6 files changed, 325 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+zig-out
+zig-cache/
diff --git a/README.md b/README.md
@@ -0,0 +1,5 @@
+# z1brc
+
+Zig version of [One Billion Row Challenge](https://github.com/gunnarmorling/1brc)
+
+This is my naïve solution.
diff --git a/build.zig b/build.zig
@@ -0,0 +1,91 @@
+const std = @import("std");
+
+// Although this function looks imperative, note that its job is to
+// declaratively construct a build graph that will be executed by an external
+// runner.
+pub fn build(b: *std.Build) void {
+ // Standard target options allows the person running `zig build` to choose
+ // what target to build for. Here we do not override the defaults, which
+ // means any target is allowed, and the default is native. Other options
+ // for restricting supported target set are available.
+ const target = b.standardTargetOptions(.{});
+
+ // Standard optimization options allow the person running `zig build` to select
+ // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
+ // set a preferred release mode, allowing the user to decide how to optimize.
+ const optimize = b.standardOptimizeOption(.{});
+
+ const lib = b.addStaticLibrary(.{
+ .name = "z1brc",
+ // In this case the main source file is merely a path, however, in more
+ // complicated build scripts, this could be a generated file.
+ .root_source_file = .{ .path = "src/root.zig" },
+ .target = target,
+ .optimize = optimize,
+ });
+
+ // This declares intent for the library to be installed into the standard
+ // location when the user invokes the "install" step (the default step when
+ // running `zig build`).
+ b.installArtifact(lib);
+
+ const exe = b.addExecutable(.{
+ .name = "z1brc",
+ .root_source_file = .{ .path = "src/main.zig" },
+ .target = target,
+ .optimize = optimize,
+ });
+
+ // This declares intent for the executable to be installed into the
+ // standard location when the user invokes the "install" step (the default
+ // step when running `zig build`).
+ b.installArtifact(exe);
+
+ // This *creates* a Run step in the build graph, to be executed when another
+ // step is evaluated that depends on it. The next line below will establish
+ // such a dependency.
+ const run_cmd = b.addRunArtifact(exe);
+
+ // By making the run step depend on the install step, it will be run from the
+ // installation directory rather than directly from within the cache directory.
+ // This is not necessary, however, if the application depends on other installed
+ // files, this ensures they will be present and in the expected location.
+ run_cmd.step.dependOn(b.getInstallStep());
+
+ // This allows the user to pass arguments to the application in the build
+ // command itself, like this: `zig build run -- arg1 arg2 etc`
+ if (b.args) |args| {
+ run_cmd.addArgs(args);
+ }
+
+ // This creates a build step. It will be visible in the `zig build --help` menu,
+ // and can be selected like this: `zig build run`
+ // This will evaluate the `run` step rather than the default, which is "install".
+ const run_step = b.step("run", "Run the app");
+ run_step.dependOn(&run_cmd.step);
+
+ // Creates a step for unit testing. This only builds the test executable
+ // but does not run it.
+ const lib_unit_tests = b.addTest(.{
+ .root_source_file = .{ .path = "src/root.zig" },
+ .target = target,
+ .optimize = optimize,
+ });
+
+ const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
+
+ const exe_unit_tests = b.addTest(.{
+ .root_source_file = .{ .path = "src/main.zig" },
+ .target = target,
+ .optimize = optimize,
+ });
+
+ const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
+
+ // Similar to creating the run step earlier, this exposes a `test` step to
+ // the `zig build --help` menu, providing a way for the user to request
+ // running the unit tests.
+ const test_step = b.step("test", "Run unit tests");
+ test_step.dependOn(&run_lib_unit_tests.step);
+ test_step.dependOn(&run_exe_unit_tests.step);
+}
diff --git a/build.zig.zon b/build.zig.zon
@@ -0,0 +1,62 @@
+.{
+ .name = "z1brc",
+ // This is a [Semantic Version](https://semver.org/).
+ // In a future version of Zig it will be used for package deduplication.
+ .version = "0.0.0",
+
+ // This field is optional.
+ // This is currently advisory only; Zig does not yet do anything
+ // with this value.
+ //.minimum_zig_version = "0.11.0",
+
+ // This field is optional.
+ // Each dependency must either provide a `url` and `hash`, or a `path`.
+ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+ // Once all dependencies are fetched, `zig build` no longer requires
+ // internet connectivity.
+ .dependencies = .{
+ // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+ //.example = .{
+ // // When updating this field to a new URL, be sure to delete the corresponding
+ // // `hash`, otherwise you are communicating that you expect to find the old hash at
+ // // the new URL.
+ // .url = "https://example.com/foo.tar.gz",
+ //
+ // // This is computed from the file contents of the directory of files that is
+ // // obtained after fetching `url` and applying the inclusion rules given by
+ // // `paths`.
+ // //
+ // // This field is the source of truth; packages do not come from a `url`; they
+ // // come from a `hash`. `url` is just one of many possible mirrors for how to
+ // // obtain a package matching this `hash`.
+ // //
+ // // Uses the [multihash](https://multiformats.io/multihash/) format.
+ // .hash = "...",
+ //
+ // // When this is provided, the package is found in a directory relative to the
+ // // build root. In this case the package's hash is irrelevant and therefore not
+ // // computed. This field and `url` are mutually exclusive.
+ // .path = "foo",
+ //},
+ },
+
+ // Specifies the set of files and directories that are included in this package.
+ // Only files and directories listed here are included in the `hash` that
+ // is computed for this package.
+ // Paths are relative to the build root. Use the empty string (`""`) to refer to
+ // the build root itself.
+ // A directory listed here means that all files within, recursively, are included.
+ .paths = .{
+ // This makes *all* files, recursively, included in this package. It is generally
+ // better to explicitly list the files and directories instead, to insure that
+ // fetching from tarballs, file system paths, and version control all result
+ // in the same contents hash.
+ "",
+ // For example...
+ //"build.zig",
+ //"build.zig.zon",
+ //"src",
+ //"LICENSE",
+ //"README.md",
+ },
+}
diff --git a/src/main.zig b/src/main.zig
@@ -0,0 +1,155 @@
+const std = @import("std");
+
+pub fn main() !void {
+ var t = try std.time.Timer.start();
+ std.log.err("start!", .{});
+
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ defer _ = gpa.deinit();
+ const a = gpa.allocator();
+
+ var args = std.process.args();
+ defer args.deinit();
+ if (!args.skip()) @panic("program name wasn't supplied wtf");
+ const infile_name = args.next() orelse return error.NoInputFile;
+ const infile = try open_mmap(std.fs.cwd(), infile_name);
+ defer std.os.munmap(infile);
+
+ const out = try run(a, infile, &t);
+ defer a.free(out);
+ try std.io.getStdOut().writeAll(out);
+
+ std.log.err("finished at {} s", .{t.read() / std.time.ns_per_s});
+}
+
+fn run(a: std.mem.Allocator, infile: []const u8, t: *std.time.Timer) ![]const u8 {
+
+ std.log.err("mmap done, iterating!", .{});
+ var res = std.StringArrayHashMap(Accumulator).init(a);
+ defer {
+ var it = res.iterator();
+ while (it.next()) |e| {
+ a.free(e.key_ptr.*);
+ }
+ res.deinit();
+ }
+ var lines = std.mem.tokenizeScalar(u8, infile, '\n');
+ var ct: usize = 0;
+ while (lines.next()) |line| {
+ ct += 1;
+ if (ct % 100000 == 0) {
+ std.log.err("processed {} lines at {} seconds", .{ct, t.read() / std.time.ns_per_s});
+ }
+ var spl = std.mem.splitScalar(u8, line, ';');
+ const key = spl.first();
+ const val_s = spl.next() orelse return error.Malformatted;
+ var val: u16 = 0;
+ var is_neg: bool = false;
+ for (val_s) |c| {
+ if (c == '-') {
+ is_neg = true;
+ } else if (c >= '0' and c <= '9') {
+ const x = c - '0';
+ val *= 10;
+ val += x;
+ }
+ }
+ if (!is_neg) {
+ val += 999;
+ }
+ const kd = try a.dupe(u8, key);
+ const gpr = try res.getOrPut(kd);
+ if (gpr.found_existing) {
+ a.free(kd);
+ const e = gpr.value_ptr.*;
+ gpr.value_ptr.* = .{
+ .min = @min(e.min, val),
+ .max = @max(e.min, val),
+ .sum = e.sum + val,
+ .count = e.count + 1,
+ };
+ } else {
+ gpr.value_ptr.* = .{
+ .min = val,
+ .max = val,
+ .sum = val,
+ .count = 1,
+ };
+ }
+ }
+
+ // Go theough the keys sorted
+ // OK so i think I should use integers rather than actual floating point values.
+ // -999 -> 999 maps to positive only 0 -> 1998
+ // so I guess go with u16?
+
+ const Srt = struct {
+ keys: [][]const u8,
+ pub fn lessThan(self: @This(), a_index: usize, b_index: usize) bool {
+ // character value order!
+ return std.mem.order(u8, self.keys[a_index], self.keys[b_index]).compare(.lt);
+ }
+ };
+ res.sort(Srt{.keys = res.keys()});
+
+ var rr = std.ArrayList(u8).init(a);
+ defer rr.deinit();
+ var ww = rr.writer();
+ try ww.writeAll("{");
+ var it = res.iterator();
+ while (it.next()) |nxt| {
+ const k = nxt.key_ptr.*;
+ try ww.writeAll(k);
+ try ww.writeAll("=");
+ const v = nxt.value_ptr.*;
+ const mm = @as(i32, v.min) - 999;
+ try std.fmt.format(ww, "{}.{}", .{@divFloor(mm ,10),@mod(mm, 10)});
+ try ww.writeAll("/");
+ const mx = @as(i32, v.max) - 999;
+ try std.fmt.format(ww, "{}.{}", .{@divFloor(mx,10), @mod(mx, 10)});
+ try ww.writeAll("/");
+ const me_a = v.sum / v.count;
+ const me = @as(i32, @intCast(me_a)) - 999;
+ try std.fmt.format(ww, "{}.{}", .{@divFloor(me ,10), @mod(me, 10)});
+ try ww.writeAll(", ");
+ }
+ try ww.writeAll("}");
+ return try rr.toOwnedSlice();
+}
+
+const Accumulator = struct {
+ min: u16,
+ max: u16,
+ sum: u64,
+ count: u64,
+};
+
+// Result must be closed with std.os.munmap
+fn open_mmap(dir: std.fs.Dir, file_path: []const u8) ![]align(std.mem.page_size) u8 {
+ var f = try dir.openFile(file_path, .{ .mode = .read_only });
+ defer f.close();
+ const stat = try f.stat();
+ return try std.os.mmap(null, stat.size, std.os.PROT.READ, std.os.MAP.PRIVATE, f.handle, 0);
+}
+
+const test_input =
+\\Hamburg;12.0
+\\Bulawayo;8.9
+\\Palembang;38.8
+\\St. John's;15.2
+\\Cracow;12.6
+\\Bridgetown;26.9
+\\Istanbul;6.2
+\\Roseau;34.4
+\\Conakry;31.2
+\\Istanbul;23.0
+;
+const test_output =
+\\{Bridgetown=26.9/26.9/26.9, Bulawayo=8.9/8.9/8.9, Conakry=31.2/31.2/31.2, Cracow=12.6/12.6/12.6, Hamburg=12.0/12.0/12.0, Istanbul=6.2/23.0/14.6, Palembang=38.8/38.8/38.8, Roseau=34.4/34.4/34.4, St. John's=15.2/15.2/15.2, }
+;
+test {
+ const a = std.testing.allocator;
+ const out = try run(a, test_input);
+ defer a.free(out);
+ try std.testing.expectEqualStrings(test_output, out);
+}
diff --git a/src/root.zig b/src/root.zig
@@ -0,0 +1,10 @@
+const std = @import("std");
+const testing = std.testing;
+
+export fn add(a: i32, b: i32) i32 {
+ return a + b;
+}
+
+test "basic add functionality" {
+ try testing.expect(add(3, 7) == 10);
+}