Hello Bytecode

On this page

Table of Contents

This chapter builds the bytecode foundation: opcode definitions and encoding (code.zig), a minimal compiler that walks the AST and emits bytecode (compiler.zig), and a stack machine that executes it (vm.zig). By the end, 1 + 2 compiles to bytecode and the VM evaluates it to 3.

Bytecode format (`code.zig`)
#

Opcodes
#

Each variant maps directly to a byte value:

const std = @import("std");

pub const Opcode = enum(u8) {
    op_constant = 0,
    op_add = 1,
    op_pop = 2,
    // More opcodes added in later chapters:
    // op_sub,
    // op_mul,
    // op_div,
    // op_true,
    // op_false,
    // op_equal,
    // op_not_equal,
    // op_greater_than,
    // op_minus,
    // op_bang,
    // op_jump_not_truthy,
    // op_jump,
    // op_null,
    // op_set_global,
    // op_get_global,
    // op_set_local,
    // op_get_local,
    // op_array,
    // op_hash,
    // op_index,
    // op_call,
    // op_return_value,
    // op_return,
    // op_get_builtin,
    // op_closure,
    // op_get_free,
    // op_current_closure,
};

pub const Definition = struct {
    name: []const u8,
    operand_widths: []const u8,
};

/// Returns the definition for an opcode: its name and operand widths.
pub fn lookup(op: Opcode) Definition {
    return switch (op) {
        .op_constant => .{ .name = "OpConstant", .operand_widths = &[_]u8{2} },
        .op_add => .{ .name = "OpAdd", .operand_widths = &[_]u8{} },
        .op_pop => .{ .name = "OpPop", .operand_widths = &[_]u8{} },
    };
}

op_constant has one operand: a 2-byte (u16) index into the constant pool. op_add and op_pop have no operands.

Encoding instructions: `make`
#

Add this function to code.zig. It encodes an opcode and its operands into a byte slice.

/// Encodes an instruction: opcode + operands -> byte slice.
pub fn make(allocator: std.mem.Allocator, op: Opcode, operands: []const usize) ![]u8 {
    const def = lookup(op);

    var instruction_len: usize = 1; // opcode byte
    for (def.operand_widths) |w| instruction_len += w;

    var instruction = try allocator.alloc(u8, instruction_len);
    instruction[0] = @intFromEnum(op);

    var offset: usize = 1;
    for (def.operand_widths, 0..) |width, i| {
        switch (width) {
            2 => std.mem.writeInt(u16, instruction[offset..][0..2], @intCast(operands[i]), .big),
            1 => instruction[offset] = @intCast(operands[i]),
            else => {},
        }
        offset += width;
    }

    return instruction;
}

Decoding operands: `readOperands`
#

Add this function to code.zig. It decodes operands from a raw instruction stream.

/// Decodes the operands of an instruction.
pub fn readOperands(def: Definition, instructions: []const u8) struct { operands: [8]usize, bytes_read: usize } {
    var operands: [8]usize = [_]usize{0} ** 8;
    var offset: usize = 0;

    for (def.operand_widths, 0..) |width, i| {
        switch (width) {
            2 => operands[i] = std.mem.readInt(u16, instructions[offset..][0..2], .big),
            1 => operands[i] = instructions[offset],
            else => {},
        }
        offset += width;
    }

    return .{ .operands = operands, .bytes_read = offset };
}

Disassembler
#

Add this function to code.zig. This is useful for debugging. It turns raw bytecode into human-readable text.

/// Produces a human-readable disassembly of bytecode instructions.
pub fn disassemble(allocator: std.mem.Allocator, instructions: []const u8) ![]const u8 {
    var buf: std.ArrayList(u8) = .empty;
    const writer = buf.writer(allocator);

    var i: usize = 0;
    while (i < instructions.len) {
        const op: Opcode = @enumFromInt(instructions[i]);
        const def = lookup(op);

        try writer.print("{d:0>4} {s}", .{ i, def.name });

        const result = readOperands(def, instructions[i + 1 ..]);
        for (def.operand_widths, 0..) |_, j| {
            try writer.print(" {d}", .{result.operands[j]});
        }
        try writer.print("\n", .{});

        i += 1 + result.bytes_read;
    }

    return buf.toOwnedSlice(allocator);
}

Example output:

0000 OpConstant 0
0003 OpConstant 1
0006 OpAdd
0007 OpPop

Tests for `code.zig`
#

test "make" {
    const allocator = std.testing.allocator;

    const tests = [_]struct {
        op: Opcode,
        operands: []const usize,
        expected: []const u8,
    }{
        .{
            .op = .op_constant,
            .operands = &[_]usize{65534},
            .expected = &[_]u8{ @intFromEnum(Opcode.op_constant), 0xFF, 0xFE },
        },
        .{
            .op = .op_add,
            .operands = &[_]usize{},
            .expected = &[_]u8{@intFromEnum(Opcode.op_add)},
        },
    };

    for (tests) |tt| {
        const result = try make(allocator, tt.op, tt.operands);
        defer allocator.free(result);
        try std.testing.expectEqualSlices(u8, tt.expected, result);
    }
}

test "read operands" {
    const tests = [_]struct {
        op: Opcode,
        operands_input: []const usize,
        bytes_read: usize,
    }{
        .{ .op = .op_constant, .operands_input = &[_]usize{65535}, .bytes_read = 2 },
    };

    const allocator = std.testing.allocator;
    for (tests) |tt| {
        const instruction = try make(allocator, tt.op, tt.operands_input);
        defer allocator.free(instruction);
        const def = lookup(tt.op);
        const result = readOperands(def, instruction[1..]);
        try std.testing.expectEqual(tt.bytes_read, result.bytes_read);
        for (tt.operands_input, 0..) |expected, i| {
            try std.testing.expectEqual(expected, result.operands[i]);
        }
    }
}

The compiler (`compiler.zig`)
#

Bytecode output struct
#

const std = @import("std");
const ast = @import("ast.zig");
const code = @import("code.zig");
const object = @import("object.zig");

pub const Bytecode = struct {
    instructions: []const u8,
    constants: []const object.Object,
};

Compiler struct
#

pub const Compiler = struct {
    instructions: std.ArrayList(u8),
    constants: std.ArrayList(object.Object),
    allocator: std.mem.Allocator,

    const CompileError = std.mem.Allocator.Error || error{ UnknownOperator, UndefinedVariable };

    pub fn init(allocator: std.mem.Allocator) Compiler {
        return .{
            .instructions = .empty,
            .constants = .empty,
            .allocator = allocator,
        };
    }

    /// Returns the compiled bytecode.
    pub fn bytecode(self: *Compiler) Bytecode {
        return .{
            .instructions = self.instructions.items,
            .constants = self.constants.items,
        };
    }

    /// Adds a constant to the pool and returns its index.
    fn addConstant(self: *Compiler, obj: object.Object) !usize {
        try self.constants.append(self.allocator, obj);
        return self.constants.items.len - 1;
    }

    /// Emits an instruction and returns its position in the bytecode.
    fn emit(self: *Compiler, op: code.Opcode, operands: []const usize) !usize {
        const instruction = try code.make(self.allocator, op, operands);
        defer self.allocator.free(instruction);
        const pos = self.instructions.items.len;
        try self.instructions.appendSlice(self.allocator, instruction);
        return pos;
    }

    // Compilation entry points.

    pub fn compile(self: *Compiler, program: ast.Program) CompileError!void {
        for (program.statements) |stmt| {
            try self.compileStatement(stmt);
        }
    }

    fn compileStatement(self: *Compiler, stmt: ast.Statement) CompileError!void {
        switch (stmt) {
            .expression_statement => |es| {
                try self.compileExpression(es.expression);
                _ = try self.emit(.op_pop, &[_]usize{});
            },
            .let_statement => {}, // Chapter 9
            .return_statement => {}, // Chapter 11
        }
    }

    fn compileExpression(self: *Compiler, expr: ast.Expression) CompileError!void {
        switch (expr) {
            .integer_literal => |il| {
                const int_obj = object.Object{ .integer = .{ .value = il.value } };
                const const_idx = try self.addConstant(int_obj);
                _ = try self.emit(.op_constant, &[_]usize{const_idx});
            },
            .infix => |inf| {
                try self.compileExpression(inf.left.*);
                try self.compileExpression(inf.right.*);

                if (std.mem.eql(u8, inf.operator, "+")) {
                    _ = try self.emit(.op_add, &[_]usize{});
                } else {
                    return error.UnknownOperator;
                }
            },
            else => {}, // Other expressions added in later chapters
        }
    }
};

How it works
#

For the input 1 + 2:

Parser produces: ExpressionStatement(Infix(IntegerLiteral(1), "+", IntegerLiteral(2)))
Compiler visits ExpressionStatement → calls compileExpression → emits OpPop after
compileExpression sees Infix → compiles left (1), compiles right (2), emits OpAdd
Compiling IntegerLiteral(1): adds Integer{1} to constants at index 0, emits OpConstant 0
Compiling IntegerLiteral(2): adds Integer{2} to constants at index 1, emits OpConstant 1

Result:

Constants: [Integer(1), Integer(2)]
Bytecode:  OpConstant 0, OpConstant 1, OpAdd, OpPop

The vm (`vm.zig`)
#

VM struct
#

const std = @import("std");
const code = @import("code.zig");
const object = @import("object.zig");
const compiler = @import("compiler.zig");

const STACK_SIZE = 2048;

pub const VM = struct {
    constants: []const object.Object,
    instructions: []const u8,
    stack: [STACK_SIZE]?object.Object,
    sp: usize, // Stack pointer: always points to the NEXT free slot.
               // stack[sp - 1] is the top element.

    pub fn init(bc: compiler.Bytecode) VM {
        return .{
            .constants = bc.constants,
            .instructions = bc.instructions,
            .stack = [_]?object.Object{null} ** STACK_SIZE,
            .sp = 0,
        };
    }

    /// Returns the element on top of the stack (without popping).
    pub fn stackTop(self: *VM) ?object.Object {
        if (self.sp == 0) return null;
        return self.stack[self.sp - 1];
    }

    /// Returns the last element that was popped off the stack.
    /// Used to get the result of expression statements (which emit OpPop).
    pub fn lastPoppedStackElem(self: *VM) ?object.Object {
        return self.stack[self.sp];
    }

    fn push(self: *VM, obj: object.Object) !void {
        if (self.sp >= STACK_SIZE) return error.StackOverflow;
        self.stack[self.sp] = obj;
        self.sp += 1;
    }

    fn pop(self: *VM) object.Object {
        const obj = self.stack[self.sp - 1].?;
        self.sp -= 1;
        return obj;
    }

    /// The fetch-decode-execute loop.
    pub fn run(self: *VM) !void {
        var ip: usize = 0;

        while (ip < self.instructions.len) {
            const op: code.Opcode = @enumFromInt(self.instructions[ip]);

            switch (op) {
                .op_constant => {
                    const const_idx = std.mem.readInt(
                        u16,
                        self.instructions[ip + 1 ..][0..2],
                        .big,
                    );
                    ip += 3; // 1 opcode + 2 operand bytes
                    try self.push(self.constants[const_idx]);
                },
                .op_add => {
                    const right = self.pop();
                    const left = self.pop();
                    const result = left.integer.value + right.integer.value;
                    try self.push(.{ .integer = .{ .value = result } });
                    ip += 1;
                },
                .op_pop => {
                    _ = self.pop();
                    ip += 1;
                },
            }
        }
    }
};

IP management
#

Each switch case manages its own ip advancement. This is cleaner than incrementing at the end of the loop because jump instructions (chapter 8) set ip directly. op_constant advances by 3 (1 opcode + 2 operand bytes), while op_add and op_pop advance by 1 (no operands).

`lastPoppedStackElem`
#

Expression statements emit OpPop, which removes the result from the stack. But we still want to show it in the REPL. lastPoppedStackElem() returns self.stack[self.sp], the slot that was just freed by pop(). The value is still there in memory; we just decremented sp past it.

Step 4: create `vm_test.zig`
#

Create src/vm_test.zig. These tests import the lexer, parser, and compiler, so they belong in a separate file rather than inline in vm.zig. The testVMRun helper parses, compiles, and runs Monkey code through the full pipeline. You’ll reuse it in every compiler chapter from here on.

Start the file with these imports:

const std = @import("std");
const object = @import("object.zig");
const compiler = @import("compiler.zig");
const VM = @import("vm.zig").VM;
const Lexer = @import("lexer.zig").Lexer;
const Parser = @import("parser.zig").Parser;

/// Parses, compiles, and runs Monkey source code through the VM.
/// Returns the last value popped from the stack (the result of the last expression statement).
/// Reused in all subsequent compiler chapters.
fn testVMRun(arena: *std.heap.ArenaAllocator, input: []const u8) !object.Object {
    const alloc = arena.allocator();

    var l = Lexer.init(input);
    var p = Parser.init(alloc, &l);
    const program = try p.parseProgram();

    var comp = compiler.Compiler.init(alloc);
    try comp.compile(program);

    var machine = VM.init(comp.bytecode());
    try machine.run();

    return machine.lastPoppedStackElem() orelse error.NoResult;
}

test "integer arithmetic" {
    const allocator = std.testing.allocator;

    const tests = [_]struct { input: []const u8, expected: i64 }{
        .{ .input = "1", .expected = 1 },
        .{ .input = "2", .expected = 2 },
        .{ .input = "1 + 2", .expected = 3 },
    };

    for (tests) |tt| {
        var arena = std.heap.ArenaAllocator.init(allocator);
        defer arena.deinit();
        const result = try testVMRun(&arena, tt.input);
        try std.testing.expectEqual(tt.expected, result.integer.value);
    }
}

Verify it works
#

Make sure you’ve uncommented "src/code.zig" and "src/vm_test.zig" in build.zig, then run zig build test. No output means all tests passed.

In chapter 7 we extend the compiler and VM to handle all arithmetic operators, booleans, comparisons, and prefix expressions.

Bytecode format (code.zig) #

Opcodes #

Encoding instructions: make #

Decoding operands: readOperands #

Disassembler #

Tests for code.zig #

The compiler (compiler.zig) #

Bytecode output struct #

Compiler struct #

How it works #

The vm (vm.zig) #

VM struct #

IP management #

lastPoppedStackElem #

Step 4: create vm_test.zig #

Verify it works #