Extending the Interpreter

On this page

Table of Contents

Almost done with part 1. This chapter completes the interpreter by adding built-in functions (builtins.zig), wiring them into the evaluator, and building the REPL (main.zig).

Step 1: create `builtins.zig`
#

Create src/builtins.zig. This module defines the six Monkey built-in functions: len, first, last, rest, push, puts.

const std = @import("std");
const object = @import("object.zig");

pub const BuiltinFn = object.BuiltinFn;

/// Looks up a built-in function by name.
pub fn lookup(name: []const u8) ?BuiltinFn {
    const map = std.StaticStringMap(BuiltinFn).initComptime(.{
        .{ "len", &builtinLen },
        .{ "first", &builtinFirst },
        .{ "last", &builtinLast },
        .{ "rest", &builtinRest },
        .{ "push", &builtinPush },
        .{ "puts", &builtinPuts },
    });

    return map.get(name);
}

/// Returns an ordered list of builtin names.
/// Used by the compiler to assign indices to builtins.
pub const builtin_names = [_][]const u8{ "len", "puts", "first", "last", "rest", "push" };

fn builtinLen(allocator: std.mem.Allocator, args: []const object.Object) object.Object {
    _ = allocator;
    if (args.len != 1) return .{ .err = .{ .message = "wrong number of arguments to `len`" } };

    return switch (args[0]) {
        .string => |s| .{ .integer = .{ .value = @intCast(s.value.len) } },
        .array => |a| .{ .integer = .{ .value = @intCast(a.elements.len) } },
        else => .{ .err = .{ .message = "argument to `len` not supported" } },
    };
}

fn builtinFirst(allocator: std.mem.Allocator, args: []const object.Object) object.Object {
    _ = allocator;
    if (args.len != 1) return .{ .err = .{ .message = "wrong number of arguments to `first`" } };

    return switch (args[0]) {
        .array => |a| {
            if (a.elements.len > 0) return a.elements[0];
            return .{ .null = .{} };
        },
        else => .{ .err = .{ .message = "argument to `first` must be ARRAY" } },
    };
}

fn builtinLast(allocator: std.mem.Allocator, args: []const object.Object) object.Object {
    _ = allocator;
    if (args.len != 1) return .{ .err = .{ .message = "wrong number of arguments to `last`" } };

    return switch (args[0]) {
        .array => |a| {
            if (a.elements.len > 0) return a.elements[a.elements.len - 1];
            return .{ .null = .{} };
        },
        else => .{ .err = .{ .message = "argument to `last` must be ARRAY" } },
    };
}

fn builtinRest(allocator: std.mem.Allocator, args: []const object.Object) object.Object {
    if (args.len != 1) return .{ .err = .{ .message = "wrong number of arguments to `rest`" } };

    return switch (args[0]) {
        .array => |a| {
            if (a.elements.len == 0) return .{ .null = .{} };
            const new_elements = allocator.dupe(object.Object, a.elements[1..]) catch {
                return .{ .err = .{ .message = "allocation failed in `rest`" } };
            };
            return .{ .array = .{ .elements = new_elements } };
        },
        else => .{ .err = .{ .message = "argument to `rest` must be ARRAY" } },
    };
}

fn builtinPush(allocator: std.mem.Allocator, args: []const object.Object) object.Object {
    if (args.len != 2) return .{ .err = .{ .message = "wrong number of arguments to `push`" } };

    return switch (args[0]) {
        .array => |a| {
            const new_elements = allocator.alloc(object.Object, a.elements.len + 1) catch {
                return .{ .err = .{ .message = "allocation failed in `push`" } };
            };
            @memcpy(new_elements[0..a.elements.len], a.elements);
            new_elements[a.elements.len] = args[1];
            return .{ .array = .{ .elements = new_elements } };
        },
        else => .{ .err = .{ .message = "first argument to `push` must be ARRAY" } },
    };
}

fn builtinPuts(allocator: std.mem.Allocator, args: []const object.Object) object.Object {
    for (args) |arg| {
        const s = arg.inspect(allocator) catch {
            return .{ .err = .{ .message = "allocation failed in `puts`" } };
        };
        std.debug.print("{s}\n", .{s});
    }

    return .{ .null = .{} };
}

Step 2: update `evaluator.zig`
#

Now that builtins.zig exists, update evaluator.zig to import it and use it for identifier resolution. Your imports in evaluator.zig should now look like:

const std = @import("std");
const ast = @import("ast.zig");
const object = @import("object.zig");
const Environment = @import("environment.zig").Environment;
const builtins = @import("builtins.zig");

Step 2a: update `evalIdentifier`
#

In chapter 3, evalIdentifier only checked the environment. Now add the builtins lookup as a fallback:

fn evalIdentifier(allocator: std.mem.Allocator, id: ast.Identifier, env: *Environment) EvalError!object.Object {
    if (env.get(id.value)) |val| return val;
    if (builtins.lookup(id.value)) |func| return .{ .builtin = .{ .func = func } };

    return newError(allocator, "identifier not found: {s}", .{id.value});
}

The lookup order matters: environment first, then builtins. This means a user can shadow a builtin with let len = 5;.

Step 2b: add the builtin tests
#

Add these tests to evaluator_test.zig. They use the same testEval helper from chapter 3:

test "builtin len" {
    const allocator = std.testing.allocator;

    const tests = [_]struct { input: []const u8, expected_int: ?i64, expected_err: ?[]const u8 }{
        .{ .input = "len(\"\")", .expected_int = 0, .expected_err = null },
        .{ .input = "len(\"four\")", .expected_int = 4, .expected_err = null },
        .{ .input = "len(\"hello world\")", .expected_int = 11, .expected_err = null },
        .{ .input = "len([1, 2, 3])", .expected_int = 3, .expected_err = null },
        .{ .input = "len(1)", .expected_int = null, .expected_err = "argument to `len` not supported" },
    };

    for (tests) |tt| {
        var arena = std.heap.ArenaAllocator.init(allocator);
        defer arena.deinit();
        const result = try testEval(arena.allocator(), tt.input);

        if (tt.expected_int) |expected| try std.testing.expectEqual(expected, result.integer.value);
        if (tt.expected_err) |expected| try std.testing.expectEqualStrings(expected, result.err.message);
    }
}

test "builtin array functions" {
    const allocator = std.testing.allocator;
    var arena = std.heap.ArenaAllocator.init(allocator);
    defer arena.deinit();

    // first
    var result = try testEval(arena.allocator(), "first([1, 2, 3])");
    try std.testing.expectEqual(@as(i64, 1), result.integer.value);

    // last
    result = try testEval(arena.allocator(), "last([1, 2, 3])");
    try std.testing.expectEqual(@as(i64, 3), result.integer.value);

    // rest
    result = try testEval(arena.allocator(), "rest([1, 2, 3])");
    try std.testing.expectEqual(@as(usize, 2), result.array.elements.len);
    try std.testing.expectEqual(@as(i64, 2), result.array.elements[0].integer.value);
    try std.testing.expectEqual(@as(i64, 3), result.array.elements[1].integer.value);

    // push
    result = try testEval(arena.allocator(), "push([1, 2], 3)");
    try std.testing.expectEqual(@as(usize, 3), result.array.elements.len);
    try std.testing.expectEqual(@as(i64, 3), result.array.elements[2].integer.value);
}

Step 3: create `main.zig`
#

Create src/main.zig. The REPL reads a line, lexes, parses, evaluates, and prints the result.

const std = @import("std");
const Lexer = @import("lexer.zig").Lexer;
const Parser = @import("parser.zig").Parser;
const evaluator = @import("evaluator.zig");
const Environment = @import("environment.zig").Environment;

const PROMPT = ">> ";

const MONKEY_FACE =
    \\            __,__
    \\   .--.  .-"     "-.  .--.
    \\  / .. \/  .-. .-.  \/ .. \
    \\ | |  '|  /   Y   \  |'  | |
    \\ | \   \  \ 0 | 0 /  /   / |
    \\  \ '- ,\.-"""""""-./, -' /
    \\   ''-' /_   ^ ^   _\ '-''
    \\       |  \._   _./  |
    \\       \   \ '~' /   /
    \\        '._ '-=-' _.'
    \\           '-----'
;

/// Reads a line from stdin into a heap-allocated string.
fn readLine(allocator: std.mem.Allocator) !?[]u8 {
    var line: std.ArrayList(u8) = .empty;
    errdefer line.deinit(allocator);

    while (true) {
        var buf: [1]u8 = undefined;
        const n = std.posix.read(std.posix.STDIN_FILENO, &buf) catch return null;
        if (n == 0) {
            // EOF
            if (line.items.len == 0) return null;
            return try line.toOwnedSlice(allocator);
        }

        if (buf[0] == '\n') return try line.toOwnedSlice(allocator);
        try line.append(allocator, buf[0]);
    }
}

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    var env = Environment.init(allocator);

    std.debug.print("Welcome to the Monkey programming language!\n", .{});
    std.debug.print("Feel free to type in commands\n", .{});

    while (true) {
        std.debug.print("{s}", .{PROMPT});

        const line = try readLine(allocator) orelse break; // EOF (Ctrl-D)
        defer allocator.free(line);

        // Use an arena for per-line allocations (parser, AST, intermediate objects)
        var arena = std.heap.ArenaAllocator.init(allocator);
        defer arena.deinit();

        var l = Lexer.init(line);
        var p = Parser.init(arena.allocator(), &l);
        const program = p.parseProgram() catch |err| {
            std.debug.print("Parse error: {}\n", .{err});
            continue;
        };

        if (p.errors.items.len > 0) {
            std.debug.print("{s}", .{MONKEY_FACE});
            std.debug.print("\nWoops! We ran into some monkey business here!\n", .{});
            std.debug.print(" parser errors:\n", .{});
            for (p.errors.items) |err| std.debug.print("\t{s}\n", .{err});
            continue;
        }

        const result = evaluator.evalProgram(arena.allocator(), program, &env) catch |err| {
            std.debug.print("Eval error: {}\n", .{err});
            continue;
        };

        const output = result.inspect(arena.allocator()) catch continue;
        std.debug.print("{s}\n", .{output});
    }

    std.debug.print("\nGoodbye!\n", .{});
}

Memory model
#

Two allocators work together. The GPA (General Purpose Allocator) is long-lived. It backs the environment and REPL line reading, and gets leak-checked on exit. The arena is short-lived, created fresh for each REPL line. The parser, AST, and intermediate evaluation objects all allocate from the arena. When the line is done, the arena frees everything at once.

The environment uses the GPA because it must survive across lines. When you type let x = 5; on one line and x on the next, x must still be bound.

Verify it works
#

Run zig build test. No output means all tests passed.

Build and run the REPL with zig build run. Try a few expressions to confirm everything works. Press Ctrl-D to exit.

zig build run

Try it out:

>> let x = 5
5
>> let add = fn(a, b) { a + b; }
fn(...) { ... }
>> add(x, 10)
15
>> len("hello")
5
>> push([1, 2], 3)
[1, 2, 3]

In chapter 5 we cover the theory behind compilers and virtual machines, and from chapter 6 onward we replace the interpreter with a bytecode compiler and stack-based VM. At this point you’ve built a working interpreter. You can stop here if you’re learning is complete. The next chapter begins building the compiler.

Step 1: create builtins.zig #

Step 2: update evaluator.zig #

Step 2a: update evalIdentifier #

Step 2b: add the builtin tests #

Step 3: create main.zig #

Memory model #

Verify it works #