We extend the compiler and VM to handle composite data types. Strings are stored as constants (like integers). Arrays and hashes are built at runtime from values on the stack. Follow these steps in order.
Step 1: update code.zig
#
Add these three variants to the Opcode enum in code.zig:
op_array = 18, // operand: u16 count (number of elements)
op_hash = 19, // operand: u16 count (number of key+value items, so count/2 pairs)
op_index = 20, // no operands
Add these entries to the lookup function in code.zig:
.op_array => .{ .name = "OpArray", .operand_widths = &[_]u8{2} },
.op_hash => .{ .name = "OpHash", .operand_widths = &[_]u8{2} },
.op_index => .{ .name = "OpIndex", .operand_widths = &[_]u8{} },
Step 2: update compiler.zig
#
All compiler code blocks in this section are new cases to add to compileExpression in compiler.zig.
Strings #
Strings are stored in the constant pool, just like integers. Add this case to compileExpression:
.string_literal => |s| {
const str_obj = object.Object{ .string = .{ .value = s.value } };
const const_idx = try self.addConstant(str_obj);
_ = try self.emit(.op_constant, &[_]usize{const_idx});
},
Step 3: update vm.zig
#
String concatenation #
Replace the executeBinaryOperation method on the VM struct (from chapter 7) with this version that adds string concatenation:
fn executeBinaryOperation(self: *VM, op: code.Opcode) !void {
const right = self.pop();
const left = self.pop();
if (left == .integer and right == .integer) {
return self.executeBinaryIntegerOperation(op, left.integer.value, right.integer.value);
}
// String concatenation
if (left == .string and right == .string and op == .op_add) {
const new_val = try std.fmt.allocPrint(
self.allocator,
"{s}{s}",
.{ left.string.value, right.string.value },
);
try self.push(.{ .string = .{ .value = new_val } });
return;
}
return error.UnsupportedTypes;
}
The VM now needs an allocator field for string concatenation and array/hash construction. Add allocator as a new field on the VM struct and update init to accept it. Note that init now takes allocator as its first parameter:
pub const VM = struct {
constants: []const object.Object,
instructions: []const u8,
stack: [STACK_SIZE]?object.Object,
sp: usize,
globals: [GLOBALS_SIZE]?object.Object,
allocator: std.mem.Allocator, // NEW
pub fn init(allocator: std.mem.Allocator, bc: compiler.Bytecode) VM {
return .{
.constants = bc.constants,
.instructions = bc.instructions,
.stack = [_]?object.Object{null} ** STACK_SIZE,
.sp = 0,
.globals = [_]?object.Object{null} ** GLOBALS_SIZE,
.allocator = allocator,
};
}
};
Since VM.init now takes an allocator, update testVMRun in vm_test.zig. The VM allocates strings, arrays, and hashes at runtime. The arena (passed by the caller) ensures they get bulk-freed when each test completes:
var machine = VM.init(alloc, comp.bytecode());
Arrays #
How arrays compile #
[1, 2 + 3, 4 * 5] compiles to:
OpConstant 0 // push 1
OpConstant 1 // push 2
OpConstant 2 // push 3
OpAdd // 2 + 3 = 5
OpConstant 3 // push 4
OpConstant 4 // push 5
OpMul // 4 * 5 = 20
OpArray 3 // pop 3 elements, build array [1, 5, 20]
The elements are pushed onto the stack in order. OpArray knows how many to pop (the operand), collects them, and pushes the resulting array.
Arrays (compiler) #
Add this case to compileExpression in compiler.zig:
.array_literal => |al| {
for (al.elements) |elem| try self.compileExpression(elem);
_ = try self.emit(.op_array, &[_]usize{al.elements.len});
},
Arrays (vm) #
Add this case to switch (op) in the run method on the VM struct in vm.zig:
.op_array => {
const count = std.mem.readInt(u16, self.instructions[ip + 1 ..][0..2], .big);
ip += 3;
const start = self.sp - count;
const elements = try self.allocator.alloc(object.Object, count);
for (0..count) |i| {
elements[i] = self.stack[start + i].?;
}
self.sp = start; // pop all elements
try self.push(.{ .array = .{ .elements = elements } });
},
The elements sit on the stack from stack[sp - count] to stack[sp - 1]. We copy them into a new slice, reset sp, and push the array.
Hashes #
How hashes compile #
{1: 2, 3: 4} compiles to:
OpConstant 0 // push key 1
OpConstant 1 // push value 2
OpConstant 2 // push key 3
OpConstant 3 // push value 4
OpHash 4 // pop 4 items (2 pairs), build hash
The operand to OpHash is the total number of items (keys + values), not the number of pairs. So 2 pairs = 4 items.
Hashes (compiler) #
Add this case to compileExpression in compiler.zig:
.hash_literal => |hl| {
for (hl.pairs) |pair| {
try self.compileExpression(pair.key);
try self.compileExpression(pair.value);
}
_ = try self.emit(.op_hash, &[_]usize{hl.pairs.len * 2});
},
Hashes (vm) #
Add this case to switch (op) in the run method in vm.zig:
.op_hash => {
const count = std.mem.readInt(u16, self.instructions[ip + 1 ..][0..2], .big);
ip += 3;
var pairs = std.AutoHashMap(object.HashKey, object.HashPair).init(self.allocator);
const start = self.sp - count;
var i: usize = start;
while (i < self.sp) : (i += 2) {
const key = self.stack[i].?;
const value = self.stack[i + 1].?;
const hash_key = key.hashKey() orelse return error.UnhashableKey;
try pairs.put(hash_key, .{ .key = key, .value = value });
}
self.sp = start; // pop all items
try self.push(.{ .hash = .{ .pairs = pairs } });
},
Index operator #
Index (compiler) #
Add this case to compileExpression in compiler.zig:
.index_expression => |ie| {
try self.compileExpression(ie.left.*);
try self.compileExpression(ie.index.*);
_ = try self.emit(.op_index, &[_]usize{});
},
Index (vm) #
Add this case to switch (op) in the run method in vm.zig:
.op_index => {
ip += 1;
const index = self.pop();
const left = self.pop();
if (left == .array and index == .integer) {
try self.executeArrayIndex(left, index);
} else if (left == .hash) {
try self.executeHashIndex(left, index);
} else {
return error.IndexNotSupported;
}
},
Add this method to the VM struct in vm.zig:
fn executeArrayIndex(self: *VM, array: object.Object, index: object.Object) !void {
const elements = array.array.elements;
const idx = index.integer.value;
const max: i64 = @as(i64, @intCast(elements.len)) - 1;
if (idx < 0 or idx > max) {
try self.push(.{ .null = .{} });
return;
}
try self.push(elements[@intCast(idx)]);
}
Add this method to the VM struct in vm.zig:
fn executeHashIndex(self: *VM, hash_obj: object.Object, index: object.Object) !void {
const hash = hash_obj.hash;
const key = index.hashKey() orelse return error.UnhashableKey;
const pair = hash.pairs.get(key) orelse {
try self.push(.{ .null = .{} });
return;
};
try self.push(pair.value);
}
Out-of-bounds array access and missing hash keys both push .{ .null = .{} } (not an error). This matches Monkey semantics.
Tests #
All tests below go in vm_test.zig (they use testVMRun, which imports the lexer, parser, and compiler).
String tests #
test "string expressions" {
const allocator = std.testing.allocator;
const tests = [_]struct { input: []const u8, expected: []const u8 }{
.{ .input = "\"monkey\"", .expected = "monkey" },
.{ .input = "\"mon\" + \"key\"", .expected = "monkey" },
.{ .input = "\"mon\" + \"key\" + \"banana\"", .expected = "monkeybanana" },
};
for (tests) |tt| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena, tt.input);
try std.testing.expectEqualStrings(tt.expected, result.string.value);
}
}
test "array literals" {
const allocator = std.testing.allocator;
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
// Empty array
var result = try testVMRun(&arena, "[]");
try std.testing.expectEqual(@as(usize, 0), result.array.elements.len);
// Array with expressions
result = try testVMRun(&arena, "[1, 2, 3]");
try std.testing.expectEqual(@as(usize, 3), result.array.elements.len);
try std.testing.expectEqual(@as(i64, 1), result.array.elements[0].integer.value);
try std.testing.expectEqual(@as(i64, 2), result.array.elements[1].integer.value);
try std.testing.expectEqual(@as(i64, 3), result.array.elements[2].integer.value);
// Array with computed values
result = try testVMRun(&arena, "[1 + 2, 3 * 4, 5 + 6]");
try std.testing.expectEqual(@as(i64, 3), result.array.elements[0].integer.value);
try std.testing.expectEqual(@as(i64, 12), result.array.elements[1].integer.value);
try std.testing.expectEqual(@as(i64, 11), result.array.elements[2].integer.value);
}
test "index expressions" {
const allocator = std.testing.allocator;
const int_tests = [_]struct { input: []const u8, expected: i64 }{
.{ .input = "[1, 2, 3][1]", .expected = 2 },
.{ .input = "[1, 2, 3][0 + 2]", .expected = 3 },
.{ .input = "[[1, 1, 1]][0][0]", .expected = 1 },
.{ .input = "{1: 1, 2: 2}[1]", .expected = 1 },
.{ .input = "{1: 1, 2: 2}[2]", .expected = 2 },
};
for (int_tests) |tt| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena, tt.input);
try std.testing.expectEqual(tt.expected, result.integer.value);
}
// Out of bounds / missing key return null
const null_tests = [_][]const u8{
"[][0]",
"[1, 2, 3][99]",
"[1][-1]",
"{1: 1}[0]",
};
for (null_tests) |input| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena, input);
try std.testing.expect(result == .null);
}
}
Verify it works #
Run zig build test. No output means all tests passed.
In chapter 11 we add functions, the most complex feature so far. It requires compiled function objects, call frames, local variables, and arguments.