Initial - z1brc - Unnamed repository; edit this file 'description' to name the repository.

commit 25f18c4261eac8834bb96809c5e3b9f17b5d536d
Author: Martin Ashby <martin@ashbysoft.com>
Date:   Tue, 30 Jan 2024 11:39:05 +0000

Initial

Diffstat:
A .gitignore  | 2 ++
A README.md  | 5 +++++
A build.zig  | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A build.zig.zon  | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/main.zig  | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/root.zig  | 10 ++++++++++

6 files changed, 325 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+zig-out
+zig-cache/
diff --git a/README.md b/README.md
@@ -0,0 +1,5 @@
+# z1brc
+
+Zig version of [One Billion Row Challenge](https://github.com/gunnarmorling/1brc)
+
+This is my naïve solution.
diff --git a/build.zig b/build.zig
@@ -0,0 +1,91 @@
+const std = @import("std");
+
+// Although this function looks imperative, note that its job is to
+// declaratively construct a build graph that will be executed by an external
+// runner.
+pub fn build(b: *std.Build) void {
+    // Standard target options allows the person running `zig build` to choose
+    // what target to build for. Here we do not override the defaults, which
+    // means any target is allowed, and the default is native. Other options
+    // for restricting supported target set are available.
+    const target = b.standardTargetOptions(.{});
+
+    // Standard optimization options allow the person running `zig build` to select
+    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
+    // set a preferred release mode, allowing the user to decide how to optimize.
+    const optimize = b.standardOptimizeOption(.{});
+
+    const lib = b.addStaticLibrary(.{
+        .name = "z1brc",
+        // In this case the main source file is merely a path, however, in more
+        // complicated build scripts, this could be a generated file.
+        .root_source_file = .{ .path = "src/root.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    // This declares intent for the library to be installed into the standard
+    // location when the user invokes the "install" step (the default step when
+    // running `zig build`).
+    b.installArtifact(lib);
+
+    const exe = b.addExecutable(.{
+        .name = "z1brc",
+        .root_source_file = .{ .path = "src/main.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    // This declares intent for the executable to be installed into the
+    // standard location when the user invokes the "install" step (the default
+    // step when running `zig build`).
+    b.installArtifact(exe);
+
+    // This *creates* a Run step in the build graph, to be executed when another
+    // step is evaluated that depends on it. The next line below will establish
+    // such a dependency.
+    const run_cmd = b.addRunArtifact(exe);
+
+    // By making the run step depend on the install step, it will be run from the
+    // installation directory rather than directly from within the cache directory.
+    // This is not necessary, however, if the application depends on other installed
+    // files, this ensures they will be present and in the expected location.
+    run_cmd.step.dependOn(b.getInstallStep());
+
+    // This allows the user to pass arguments to the application in the build
+    // command itself, like this: `zig build run -- arg1 arg2 etc`
+    if (b.args) |args| {
+        run_cmd.addArgs(args);
+    }
+
+    // This creates a build step. It will be visible in the `zig build --help` menu,
+    // and can be selected like this: `zig build run`
+    // This will evaluate the `run` step rather than the default, which is "install".
+    const run_step = b.step("run", "Run the app");
+    run_step.dependOn(&run_cmd.step);
+
+    // Creates a step for unit testing. This only builds the test executable
+    // but does not run it.
+    const lib_unit_tests = b.addTest(.{
+        .root_source_file = .{ .path = "src/root.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
+
+    const exe_unit_tests = b.addTest(.{
+        .root_source_file = .{ .path = "src/main.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
+
+    // Similar to creating the run step earlier, this exposes a `test` step to
+    // the `zig build --help` menu, providing a way for the user to request
+    // running the unit tests.
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&run_lib_unit_tests.step);
+    test_step.dependOn(&run_exe_unit_tests.step);
+}
diff --git a/build.zig.zon b/build.zig.zon
@@ -0,0 +1,62 @@
+.{
+    .name = "z1brc",
+    // This is a [Semantic Version](https://semver.org/).
+    // In a future version of Zig it will be used for package deduplication.
+    .version = "0.0.0",
+
+    // This field is optional.
+    // This is currently advisory only; Zig does not yet do anything
+    // with this value.
+    //.minimum_zig_version = "0.11.0",
+
+    // This field is optional.
+    // Each dependency must either provide a `url` and `hash`, or a `path`.
+    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+    // Once all dependencies are fetched, `zig build` no longer requires
+    // internet connectivity.
+    .dependencies = .{
+        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+        //.example = .{
+        //    // When updating this field to a new URL, be sure to delete the corresponding
+        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
+        //    // the new URL.
+        //    .url = "https://example.com/foo.tar.gz",
+        //
+        //    // This is computed from the file contents of the directory of files that is
+        //    // obtained after fetching `url` and applying the inclusion rules given by
+        //    // `paths`.
+        //    //
+        //    // This field is the source of truth; packages do not come from a `url`; they
+        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
+        //    // obtain a package matching this `hash`.
+        //    //
+        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
+        //    .hash = "...",
+        //
+        //    // When this is provided, the package is found in a directory relative to the
+        //    // build root. In this case the package's hash is irrelevant and therefore not
+        //    // computed. This field and `url` are mutually exclusive.
+        //    .path = "foo",
+        //},
+    },
+
+    // Specifies the set of files and directories that are included in this package.
+    // Only files and directories listed here are included in the `hash` that
+    // is computed for this package.
+    // Paths are relative to the build root. Use the empty string (`""`) to refer to
+    // the build root itself.
+    // A directory listed here means that all files within, recursively, are included.
+    .paths = .{
+        // This makes *all* files, recursively, included in this package. It is generally
+        // better to explicitly list the files and directories instead, to insure that
+        // fetching from tarballs, file system paths, and version control all result
+        // in the same contents hash.
+        "",
+        // For example...
+        //"build.zig",
+        //"build.zig.zon",
+        //"src",
+        //"LICENSE",
+        //"README.md",
+    },
+}
diff --git a/src/main.zig b/src/main.zig
@@ -0,0 +1,155 @@
+const std = @import("std");
+
+pub fn main() !void {
+    var t = try std.time.Timer.start();
+    std.log.err("start!", .{});
+
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+    const a = gpa.allocator();
+    
+    var args = std.process.args();
+    defer args.deinit();
+    if (!args.skip()) @panic("program name wasn't supplied wtf");
+    const infile_name = args.next() orelse return error.NoInputFile;
+    const infile = try open_mmap(std.fs.cwd(), infile_name);
+    defer std.os.munmap(infile);
+
+    const out = try run(a, infile, &t);
+    defer a.free(out);
+    try std.io.getStdOut().writeAll(out);
+
+    std.log.err("finished at {} s", .{t.read() / std.time.ns_per_s});
+}
+
+fn run(a: std.mem.Allocator, infile: []const u8, t: *std.time.Timer) ![]const u8 {
+    
+    std.log.err("mmap done, iterating!", .{});
+    var res = std.StringArrayHashMap(Accumulator).init(a);
+    defer {
+        var it = res.iterator();
+        while (it.next()) |e| {
+            a.free(e.key_ptr.*);
+        }
+        res.deinit();
+    }
+    var lines = std.mem.tokenizeScalar(u8, infile, '\n');
+    var ct: usize = 0;
+    while (lines.next()) |line| {
+        ct += 1;
+        if (ct % 100000 == 0) {
+            std.log.err("processed {} lines at {} seconds", .{ct, t.read() / std.time.ns_per_s});
+        }
+        var spl = std.mem.splitScalar(u8, line, ';');
+        const key = spl.first();
+        const val_s = spl.next() orelse return error.Malformatted;
+        var val: u16 = 0;
+        var is_neg: bool = false;
+        for (val_s) |c| {
+            if (c == '-') {
+                is_neg = true;
+            } else if (c >= '0' and c <= '9') {
+                const x = c - '0';
+                val *= 10;
+                val += x;
+            }
+        }
+        if (!is_neg) {
+            val += 999;
+        }
+        const kd = try a.dupe(u8, key);
+        const gpr = try res.getOrPut(kd);
+        if (gpr.found_existing) {
+            a.free(kd);
+            const e = gpr.value_ptr.*;
+            gpr.value_ptr.* = .{
+                .min = @min(e.min, val),
+                .max = @max(e.min, val),
+                .sum = e.sum + val,
+                .count = e.count + 1,
+            };
+        } else {
+            gpr.value_ptr.* = .{
+                .min = val,
+                .max = val,
+                .sum = val,
+                .count = 1,
+            };
+        }
+    }
+
+    // Go theough the keys sorted
+    // OK so i think I should use integers rather than actual floating point values.
+    // -999 -> 999 maps to positive only 0 -> 1998
+    // so I guess go with u16?
+    
+    const Srt = struct {
+        keys: [][]const u8,
+        pub fn lessThan(self: @This(), a_index: usize, b_index: usize) bool {
+            // character value order!
+            return std.mem.order(u8, self.keys[a_index], self.keys[b_index]).compare(.lt);
+        }
+    };
+    res.sort(Srt{.keys = res.keys()});
+
+    var rr = std.ArrayList(u8).init(a);
+    defer rr.deinit();
+    var ww = rr.writer();
+    try ww.writeAll("{");
+    var it = res.iterator();
+    while (it.next()) |nxt| {
+        const k = nxt.key_ptr.*;
+        try ww.writeAll(k);
+        try ww.writeAll("=");
+        const v = nxt.value_ptr.*;
+        const mm = @as(i32, v.min) - 999;
+        try std.fmt.format(ww, "{}.{}", .{@divFloor(mm ,10),@mod(mm, 10)});
+        try ww.writeAll("/");
+        const mx = @as(i32, v.max) - 999;
+        try std.fmt.format(ww, "{}.{}", .{@divFloor(mx,10), @mod(mx, 10)});
+        try ww.writeAll("/");
+        const me_a = v.sum / v.count;
+        const me = @as(i32, @intCast(me_a)) - 999;
+        try std.fmt.format(ww, "{}.{}", .{@divFloor(me ,10), @mod(me, 10)});
+        try ww.writeAll(", ");
+    }
+    try ww.writeAll("}");
+    return try rr.toOwnedSlice();
+}
+
+const Accumulator = struct {
+    min: u16,
+    max: u16,
+    sum: u64,
+    count: u64,
+};
+
+// Result must be closed with std.os.munmap
+fn open_mmap(dir: std.fs.Dir, file_path: []const u8) ![]align(std.mem.page_size) u8 {
+    var f = try dir.openFile(file_path, .{ .mode = .read_only });
+    defer f.close();
+    const stat = try f.stat();
+    return try std.os.mmap(null, stat.size, std.os.PROT.READ, std.os.MAP.PRIVATE, f.handle, 0);
+}
+
+const test_input = 
+\\Hamburg;12.0
+\\Bulawayo;8.9
+\\Palembang;38.8
+\\St. John's;15.2
+\\Cracow;12.6
+\\Bridgetown;26.9
+\\Istanbul;6.2
+\\Roseau;34.4
+\\Conakry;31.2
+\\Istanbul;23.0
+;
+const test_output = 
+\\{Bridgetown=26.9/26.9/26.9, Bulawayo=8.9/8.9/8.9, Conakry=31.2/31.2/31.2, Cracow=12.6/12.6/12.6, Hamburg=12.0/12.0/12.0, Istanbul=6.2/23.0/14.6, Palembang=38.8/38.8/38.8, Roseau=34.4/34.4/34.4, St. John's=15.2/15.2/15.2, }
+;
+test {
+    const a = std.testing.allocator;
+    const out = try run(a, test_input);
+    defer a.free(out);
+    try std.testing.expectEqualStrings(test_output, out);
+}
diff --git a/src/root.zig b/src/root.zig
@@ -0,0 +1,10 @@
+const std = @import("std");
+const testing = std.testing;
+
+export fn add(a: i32, b: i32) i32 {
+    return a + b;
+}
+
+test "basic add functionality" {
+    try testing.expect(add(3, 7) == 10);
+}

	z1brc Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README

A	.gitignore	\|	2	++
A	README.md	\|	5	+++++
A	build.zig	\|	91	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	build.zig.zon	\|	62	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/main.zig	\|	155	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/root.zig	\|	10	++++++++++