In chapter 4, builtins were resolved at runtime by name. In the compiler/VM, we resolve them at compile time through the symbol table and execute them in the VM without creating a call frame.
object.zig already has Builtin and BuiltinFn, and builtins.zig already has all six builtin implementations from chapter 4. This chapter just wires them into the compiler and VM pipeline. Follow these steps in order.
Step 1: add the builtin registry to builtins.zig
#
The compiler needs to assign each builtin a stable numeric index. The tree-walking interpreter looked up builtins by name at runtime, but bytecode needs a fixed index baked into the instructions.
Add this ordered registry array to builtins.zig. The order must stay stable because the index is baked into bytecode. It must match the order of builtin_names:
pub const BuiltinDefinition = struct {
name: []const u8,
func: object.BuiltinFn,
};
pub const builtin_definitions = [_]BuiltinDefinition{
.{ .name = "len", .func = &builtinLen },
.{ .name = "puts", .func = &builtinPuts },
.{ .name = "first", .func = &builtinFirst },
.{ .name = "last", .func = &builtinLast },
.{ .name = "rest", .func = &builtinRest },
.{ .name = "push", .func = &builtinPush },
};
The existing lookup function and builtin_names array remain unchanged. The evaluator (chapter 4) still uses them.
Step 2: add op_get_builtin to code.zig
#
Add this variant to the Opcode enum in code.zig:
op_get_builtin,
Add this entry to the lookup function in code.zig:
.op_get_builtin => .{ .name = "OpGetBuiltin", .operand_widths = &[_]u8{1} },
The operand is a single byte (u8), the index into builtin_definitions.
Step 3: update symbol_table.zig
#
Add builtin scope
#
Update the SymbolScope enum:
pub const SymbolScope = enum {
global,
local,
builtin,
// free and function come in Chapter 13
};
Add defineBuiltin method
#
Add this method to the SymbolTable struct:
/// Define a built-in function symbol. Unlike `define`, this does not
/// increment `num_definitions` -- builtins do not occupy local or global
/// slots.
pub fn defineBuiltin(self: *SymbolTable, index: usize, name: []const u8) !Symbol {
const symbol = Symbol{
.name = name,
.scope = .builtin,
.index = index,
};
try self.store.put(name, symbol);
return symbol;
}
Builtins are defined on the outermost (global) symbol table. Because resolve walks the outer chain, builtins are accessible from any scope depth. No special handling needed.
Step 4: update compiler.zig
#
Add builtins import #
Add this import to the top of compiler.zig:
const builtins = @import("builtins.zig");
Update init
#
Compiler.init previously returned Compiler (no error). It now returns !Compiler because defineBuiltin can fail. Update it to pre-define all builtins in the global symbol table at initialization:
pub fn init(allocator: std.mem.Allocator) !Compiler {
var scopes: std.ArrayList(CompilationScope) = .empty;
scopes.append(allocator, .{
.instructions = .empty,
.last_instruction = null,
.previous_instruction = null,
}) catch unreachable;
var st = SymbolTable.init(allocator);
// Pre-define all builtins in the global symbol table.
for (builtins.builtin_definitions, 0..) |def, i| _ = try st.defineBuiltin(i, def.name);
return .{
.constants = .empty,
.allocator = allocator,
.symbol_table = st,
.scopes = scopes,
.scope_index = 0,
};
}
Update all call sites to use
try Compiler.init(...). This includestestVMRuninvm_test.zigandtestCompileincompiler_test.zig.
Update loadSymbol
#
Add the .builtin case:
fn loadSymbol(self: *Compiler, sym: Symbol) CompileError!void {
switch (sym.scope) {
.global => _ = try self.emit(.op_get_global, &[_]usize{sym.index}),
.local => _ = try self.emit(.op_get_local, &[_]usize{sym.index}),
.builtin => _ = try self.emit(.op_get_builtin, &[_]usize{sym.index}),
}
}
Update let_statement
#
Add the .builtin case to the scope switch (builtins can’t be redefined via let, but the exhaustive switch requires it):
.let_statement => |ls| {
try self.compileExpression(ls.value);
const sym = try self.symbol_table.define(ls.name);
switch (sym.scope) {
.global => _ = try self.emit(.op_set_global, &[_]usize{sym.index}),
.local => _ = try self.emit(.op_set_local, &[_]usize{sym.index}),
.builtin => {}, // builtins are never set via let
}
},
That’s all the compiler changes. When the compiler encounters an identifier like len, resolve finds it with .builtin scope, and loadSymbol emits op_get_builtin 0.
Step 5: update vm.zig
#
Add builtins import #
Add this import to the top of vm.zig:
const builtins = @import("builtins.zig");
Add op_get_builtin handler
#
Add this case to switch (op) in the run method:
.op_get_builtin => {
const builtin_index: usize = @intCast(ins[ip + 1]);
frame.ip += 2;
const def = builtins.builtin_definitions[builtin_index];
try self.push(.{ .builtin = .{ .func = def.func } });
continue;
},
Update op_call for builtins
#
Replace the op_call case in the run method. It now distinguishes between compiled functions and builtins. Builtins execute immediately without creating a new frame:
.op_call => {
const num_args: usize = @intCast(ins[ip + 1]);
frame.ip += 2;
const callee = self.stack[self.sp - 1 - num_args].?;
switch (callee) {
.compiled_function => |func| {
if (num_args != func.num_parameters) return error.WrongArgumentCount;
const new_frame = Frame{
.fn_obj = func,
.ip = 0,
.base_pointer = self.sp - num_args,
};
self.pushFrame(new_frame);
self.sp = new_frame.base_pointer + func.num_locals;
continue;
},
.builtin => |b| {
// Build an args slice from the stack. The VM stack stores
// ?Object, but builtins expect []const Object. We build a
// temporary slice of unwrapped values.
var args_buf: [256]object.Object = undefined;
for (0..num_args) |i| args_buf[i] = self.stack[self.sp - num_args + i].?;
const args = args_buf[0..num_args];
const result = b.func(self.allocator, args);
// Pop arguments and the function object itself.
self.sp = self.sp - num_args - 1;
try self.push(result);
continue;
},
else => return error.CallingNonFunction,
}
},
Note: the builtin receives []const Object (not optional). We unwrap the ?Object stack values into a temporary buffer before calling.
Tests #
Symbol table test #
Add this test inline in symbol_table.zig.
test "define and resolve builtins" {
const testing = std.testing;
var global = SymbolTable.init(testing.allocator);
defer global.deinit();
// Define builtins on the global table.
const builtin_names = [_][]const u8{ "a", "b", "c", "d" };
for (builtin_names, 0..) |name, i| _ = try global.defineBuiltin(i, name);
// Create a nested local scope.
var local = SymbolTable.initEnclosed(testing.allocator, &global);
defer local.deinit();
// Builtins should be resolvable from the local scope.
const resolved = local.resolve("a").?;
try testing.expectEqual(SymbolScope.builtin, resolved.scope);
try testing.expectEqual(@as(usize, 0), resolved.index);
const resolved_d = local.resolve("d").?;
try testing.expectEqual(SymbolScope.builtin, resolved_d.scope);
try testing.expectEqual(@as(usize, 3), resolved_d.index);
}
VM tests #
Add this test to vm_test.zig.
test "builtin functions" {
const allocator = std.testing.allocator;
const int_tests = [_]struct { input: []const u8, expected: i64 }{
.{ .input = "len(\"\")", .expected = 0 },
.{ .input = "len(\"four\")", .expected = 4 },
.{ .input = "len(\"hello world\")", .expected = 11 },
.{ .input = "len([1, 2, 3])", .expected = 3 },
.{ .input = "len([])", .expected = 0 },
.{ .input = "first([1, 2, 3])", .expected = 1 },
.{ .input = "last([1, 2, 3])", .expected = 3 },
};
for (int_tests) |tt| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena, tt.input);
try std.testing.expectEqual(tt.expected, result.integer.value);
}
// Builtins inside functions
{
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena,
\\let addLen = fn(arr) { len(arr) + 1; };
\\addLen([1, 2, 3]);
);
try std.testing.expectEqual(@as(i64, 4), result.integer.value);
}
// rest returns a new array
{
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena, "rest([1, 2, 3])");
try std.testing.expectEqual(@as(usize, 2), result.array.elements.len);
try std.testing.expectEqual(@as(i64, 2), result.array.elements[0].integer.value);
try std.testing.expectEqual(@as(i64, 3), result.array.elements[1].integer.value);
}
// push returns a new array
{
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena, "push([1, 2], 3)");
try std.testing.expectEqual(@as(usize, 3), result.array.elements.len);
try std.testing.expectEqual(@as(i64, 3), result.array.elements[2].integer.value);
}
// Error cases
{
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testVMRun(&arena, "len(1)");
try std.testing.expectEqualStrings("argument to `len` not supported", result.err.message);
}
}
Verify it works #
Run zig build test. No output means all tests passed.
In chapter 13 we add closures, the final and most conceptually challenging feature.