//! Bencoding //! See specification here https://wiki.theory.org/BitTorrentSpecification#Bencoding const std = @import("std"); const AnyWriter = @import("anywriter.zig"); pub const Error = error.Malformatted || std.io.AnyReader.Error; // All content is owned by the BValue and must be freed with deinit. // hmmm, this gets a bit awkward from the _writing_ side of things. // What should I do? Optionally owned? Yeah let's do that. pub const BValue = union(enum) { int: i64, string: struct { string: []const u8, owned: bool = false, }, list: struct { list: std.ArrayList(BValue), valuesOwned: bool = false, }, dict: struct { dict: std.StringArrayHashMap(BValue), keysAndValuesOwned: bool = false, }, pub fn bencode(self: *BValue, base_writer: anytype) !void { var wrap = AnyWriter.wrapper(base_writer); var writer = wrap.any(); try self.bencodeInner(writer); } // Note: uses defined types only to avoid trying to recursively evaulate this function // at compile time, otherwise we run into https://github.com/ziglang/zig/issues/13724 fn bencodeInner(self: *BValue, writer: AnyWriter) !void { switch (self.*) { .int => |i| { try std.fmt.format(writer, "i{}e", .{i}); }, .string => |s| { try std.fmt.format(writer, "{}:{s}", .{ s.string.len, s.string }); }, .list => |*l| { try writer.writeByte('l'); for (l.list.items) |*i| { try i.bencodeInner(writer); } try writer.writeByte('e'); }, .dict => |*d| { // Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics). The strings should be compared using a binary comparison, not a culture-specific "natural" comparison. const Ctx = struct { keys: [][]const u8, pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { const a_k = ctx.keys[a_index]; const b_k = ctx.keys[b_index]; return std.mem.order(u8, a_k, b_k) == .lt; } }; var dict: *std.StringArrayHashMap(BValue) = &d.dict; dict.sort(Ctx{ .keys = dict.keys() }); try writer.writeByte('d'); var it = dict.iterator(); while (it.next()) |entry| { try std.fmt.format(writer, "{}:{s}", .{ entry.key_ptr.*.len, entry.key_ptr.* }); try entry.value_ptr.*.bencodeInner(writer); } try writer.writeByte('e'); }, } } pub fn deinit(self: *BValue, a: std.mem.Allocator) void { switch (self.*) { .int => {}, .string => |s| { if (s.owned) a.free(s.string); }, .list => |*l| { if (l.valuesOwned) for (l.list.items) |*i| i.deinit(a); l.list.deinit(); }, .dict => |*d| { if (d.keysAndValuesOwned) { var it = d.dict.iterator(); while (it.next()) |entry| { a.free(entry.key_ptr.*); entry.value_ptr.*.deinit(a); } } d.dict.deinit(); }, } } pub fn asInt(self: BValue, comptime itype: type) !itype { switch (self) { .int => |i| { return std.math.cast(itype, i) orelse error.Overflow; }, else => return error.WrongType, } } pub fn asString(self: BValue) ![]const u8 { switch (self) { .string => |s| return s.string, else => return error.WrongType, } } pub fn asList(self: BValue) !std.ArrayList(BValue) { switch (self) { .list => |l| return l.list, else => return error.WrongType, } } pub fn asDict(self: BValue) !std.StringArrayHashMap(BValue) { switch (self) { .dict => |d| return d.dict, else => return error.WrongType, } } }; pub fn bdecodeBuf(a: std.mem.Allocator, buf: []const u8) !BValue { var fbs = std.io.fixedBufferStream(buf); return try bdecode(a, fbs.reader()); } pub fn bdecode(a: std.mem.Allocator, base_reader: anytype) anyerror!BValue { var reader = PeekStream.init(base_reader.any()); return bdecodeInner(a, &reader, 0); } const PeekStream = std.io.PeekStream(.{ .Static = 1 }, std.io.AnyReader); // Note: uses defined types only to avoid trying to recursively evaulate this function // at compile time, otherwise we run into https://github.com/ziglang/zig/issues/13724 fn bdecodeInner(a: std.mem.Allocator, peekStream: *PeekStream, depth: u32) !BValue { if (depth > 100) { // TODO diagnostic... return error.Malformatted; } var reader = peekStream.reader(); var byte = try reader.readByte(); if (std.ascii.isDigit(byte)) { try peekStream.putBackByte(byte); return .{ .string = .{ .owned = true, .string = try readString(a, peekStream), } }; } else { switch (byte) { 'i' => { const max_len = comptime std.fmt.comptimePrint("{}", .{std.math.minInt(i64)}).len; var s = reader.readUntilDelimiterAlloc(a, 'e', max_len) catch return error.Malformatted; defer a.free(s); const i = std.fmt.parseInt(i64, s, 10) catch return error.Malformatted; return .{ .int = i }; }, 'l' => { var r: BValue = .{ .list = .{ .valuesOwned = true, .list = std.ArrayList(BValue).init(a), } }; errdefer r.deinit(a); while (true) { const b2 = try reader.readByte(); if (b2 == 'e') break; try peekStream.putBackByte(b2); var val = try bdecodeInner(a, peekStream, depth + 1); errdefer val.deinit(a); try r.list.list.append(val); } return r; }, 'd' => { var r: BValue = .{ .dict = .{ .keysAndValuesOwned = true, .dict = std.StringArrayHashMap(BValue).init(a) } }; errdefer r.deinit(a); while (true) { const b2 = try reader.readByte(); if (b2 == 'e') break; try peekStream.putBackByte(b2); var key = try readString(a, peekStream); errdefer a.free(key); var val = try bdecode(a, reader); errdefer val.deinit(a); try r.dict.dict.put(key, val); } return r; }, else => return error.Malformatted, // TODO diagnostics } } } // Result is owned by the caller and must be freed fn readString(a: std.mem.Allocator, peekStream: *PeekStream) ![]const u8 { var reader = peekStream.reader(); const max_len = comptime std.fmt.comptimePrint("{}", .{std.math.maxInt(usize)}).len; const str_len_s = reader.readUntilDelimiterAlloc(a, ':', max_len) catch { return error.Malformatted; }; defer a.free(str_len_s); var strlen = std.fmt.parseInt(usize, str_len_s, 10) catch return error.Malformatted; var string = try a.alloc(u8, strlen); errdefer a.free(string); reader.readNoEof(string) catch return error.Malformatted; return string; } test "bdecode empty" { var a = std.testing.allocator; try std.testing.expectError(error.EndOfStream, bdecodeBuf(a, "")); } test "bdecode too short" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "1")); } test "bdecode plain number" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "12")); } test "bdecode garbage" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "xz1234")); } test "bdecode number" { var a = std.testing.allocator; var bval = try bdecodeBuf(a, "i123e"); defer bval.deinit(a); try std.testing.expectEqualDeep(BValue{ .int = 123 }, bval); } test "bdecode number negative" { var a = std.testing.allocator; var bval = try bdecodeBuf(a, "i-123e"); defer bval.deinit(a); try std.testing.expectEqualDeep(BValue{ .int = -123 }, bval); } test "bdecode number empty" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "ie")); } test "bdecode number just sign" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "i-e")); } test "bdecode number no end" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "i123123671283")); } test "bdecode number out of range" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "i9223372036854775808e")); } test "bdecode string" { var a = std.testing.allocator; var bval = try bdecodeBuf(a, "5:hello"); defer bval.deinit(a); try std.testing.expectEqualDeep(BValue{ .string = .{ .owned = true, .string = "hello" } }, bval); } test "bdecode string too short" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "5:hell")); } test "bdecode list" { var a = std.testing.allocator; var bval = try bdecodeBuf(a, "l5:hello5:worlde"); defer bval.deinit(a); try std.testing.expectEqual(@as(usize, 2), bval.list.list.items.len); try std.testing.expectEqualStrings("hello", bval.list.list.items[0].string.string); try std.testing.expectEqualStrings("world", bval.list.list.items[1].string.string); } test "invalid list" { var a = std.testing.allocator; try std.testing.expectError(error.EndOfStream, bdecodeBuf(a, "l5:hello5:world")); // missing end } test "dict" { var a = std.testing.allocator; var bval = try bdecodeBuf(a, "d5:hello5:worlde"); defer bval.deinit(a); var v = bval.dict.dict.getPtr("hello") orelse return error.TestExpectedNotNull; try std.testing.expectEqualStrings("world", v.string.string); } test "invalid dict no value" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "d5:hello5:world2:hie")); } test "invalid dict wrong key type" { var a = std.testing.allocator; try std.testing.expectError(error.Malformatted, bdecodeBuf(a, "di32e5:helloe")); } test "nested structure" { var a = std.testing.allocator; var bval = try bdecodeBuf(a, "d5:hello5:world2:hili123ei456el4:nesteee"); defer bval.deinit(a); var v = bval.dict.dict.getPtr("hello") orelse return error.TestExpectedNotNull; try std.testing.expectEqualStrings("world", v.string.string); var v2 = bval.dict.dict.getPtr("hi") orelse return error.TestExpectedNotNull; try std.testing.expectEqualDeep(v2.*.list.list.items[0], BValue{ .int = 123 }); try std.testing.expectEqualDeep(v2.*.list.list.items[1], BValue{ .int = 456 }); try std.testing.expectEqualStrings("nest", v2.*.list.list.items[2].list.list.items[0].string.string); } test "round trip" { var a = std.testing.allocator; const in = "d5:hello5:world2:hili123ei456el4:nesteee"; var bval = try bdecodeBuf(a, in); defer bval.deinit(a); var bw = std.ArrayList(u8).init(a); defer bw.deinit(); var writer = bw.writer(); try bval.bencode(writer); var out = try bw.toOwnedSlice(); defer a.free(out); try std.testing.expectEqualStrings(in, out); }