The evaluator walks the AST and produces runtime values. We need three things: an object system to represent runtime values (integers, booleans, strings, functions, etc.), an environment for variable bindings with scope chains, and the evaluator itself for recursive tree-walking.
This chapter builds those across four files: object.zig for the runtime object system, environment.zig for variable binding environments, evaluator.zig for the tree-walking evaluator, and evaluator_test.zig for tests.
Object System (object.zig)
#
Complete object.zig
#
const std = @import("std");
const ast = @import("ast.zig");
const Environment = @import("environment.zig").Environment;
pub const ObjectType = enum {
integer,
boolean,
null,
return_value,
err,
function,
string,
builtin,
array,
hash,
};
// Individual object types.
pub const Integer = struct { value: i64 };
pub const Boolean = struct { value: bool };
pub const Null = struct {};
pub const ReturnValue = struct { value: *const Object };
pub const Error = struct { message: []const u8 };
pub const String = struct { value: []const u8 };
pub const BuiltinFn = *const fn (allocator: std.mem.Allocator, args: []const Object) Object;
pub const Builtin = struct { func: BuiltinFn };
pub const Array = struct { elements: []const Object };
pub const Hash = struct { pairs: std.AutoHashMap(HashKey, HashPair) };
pub const HashKey = struct {
type: ObjectType,
value: u64,
pub fn eql(self: HashKey, other: HashKey) bool {
return self.type == other.type and self.value == other.value;
}
};
pub const Function = struct {
parameters: []const ast.Identifier,
body: ast.BlockStatement,
env: *Environment,
};
pub const HashPair = struct {
key: Object,
value: Object,
};
/// The main Object, every runtime value in Monkey Language is one of these.
pub const Object = union(enum) {
integer: Integer,
boolean: Boolean,
null: Null,
return_value: ReturnValue,
err: Error,
function: Function,
string: String,
builtin: Builtin,
array: Array,
hash: Hash,
/// Returns the type tag of this object.
pub fn objectType(self: Object) ObjectType {
return switch (self) {
.integer => .integer,
.boolean => .boolean,
.null => .null,
.return_value => .return_value,
.err => .err,
.function => .function,
.string => .string,
.builtin => .builtin,
.array => .array,
.hash => .hash,
};
}
/// Returns a human-readable string representation of this object.
pub fn inspect(self: Object, allocator: std.mem.Allocator) ![]const u8 {
return switch (self) {
.integer => |i| try std.fmt.allocPrint(allocator, "{d}", .{i.value}),
.boolean => |b| try std.fmt.allocPrint(allocator, "{s}", .{if (b.value) "true" else "false"}),
.null => try allocator.dupe(u8, "null"),
.return_value => |rv| rv.value.inspect(allocator),
.err => |e| try std.fmt.allocPrint(allocator, "ERROR: {s}", .{e.message}),
.function => try allocator.dupe(u8, "fn(...) { ... }"),
.string => |s| try allocator.dupe(u8, s.value),
.builtin => try allocator.dupe(u8, "builtin function"),
.array => |a| {
var buf: std.ArrayList(u8) = .empty;
try buf.appendSlice(allocator, "[");
for (a.elements, 0..) |elem, idx| {
if (idx > 0) try buf.appendSlice(allocator, ", ");
const s = try elem.inspect(allocator);
try buf.appendSlice(allocator, s);
}
try buf.appendSlice(allocator, "]");
return buf.toOwnedSlice(allocator);
},
.hash => try allocator.dupe(u8, "{...}"),
};
}
/// Returns the type name as a string, useful for error messages.
pub fn typeName(self: Object) []const u8 {
return switch (self) {
.integer => "INTEGER",
.boolean => "BOOLEAN",
.null => "NULL",
.return_value => "RETURN_VALUE",
.err => "ERROR",
.function => "FUNCTION",
.string => "STRING",
.builtin => "BUILTIN",
.array => "ARRAY",
.hash => "HASH",
};
}
/// Computes a hash key for objects that can be used as hash map keys.
/// Returns null for unhashable types.
pub fn hashKey(self: Object) ?HashKey {
return switch (self) {
.integer => |i| .{ .type = .integer, .value = @bitCast(i.value) },
.boolean => |b| .{ .type = .boolean, .value = if (b.value) 1 else 0 },
.string => |s| .{ .type = .string, .value = std.hash.Wyhash.hash(0, s.value) },
else => null,
};
}
};
Environment (environment.zig)
#
The environment is a string-keyed map from variable names to objects. Environments chain: an inner scope has an outer pointer. This is how closures work. A function captures its enclosing environment.
Complete environment.zig
#
const std = @import("std");
const object = @import("object.zig");
pub const Environment = struct {
store: std.StringHashMap(object.Object),
outer: ?*Environment,
allocator: std.mem.Allocator,
/// Creates a new top-level environment.
pub fn init(allocator: std.mem.Allocator) Environment {
return .{
.store = std.StringHashMap(object.Object).init(allocator),
.outer = null,
.allocator = allocator,
};
}
/// Creates a new environment enclosed by an outer scope.
/// Used when calling functions — the function body gets a new scope
/// with access to the enclosing environment.
pub fn initEnclosed(allocator: std.mem.Allocator, outer: *Environment) Environment {
var env = init(allocator);
env.outer = outer;
return env;
}
/// Looks up a variable by name. Walks the scope chain until found.
pub fn get(self: *const Environment, name: []const u8) ?object.Object {
if (self.store.get(name)) |val| return val;
if (self.outer) |outer| return outer.get(name);
return null;
}
/// Sets a variable in the current scope.
/// The key is duped so it outlives the source (e.g., an arena-allocated AST).
pub fn set(self: *Environment, name: []const u8, val: object.Object) !void {
const owned_name = try self.allocator.dupe(u8, name);
try self.store.put(owned_name, val);
}
};
How scope chains work #
graph TD A["Global env: #123; #quot;x#quot;: 5 #125;"] -->|outer| B["Function env: #123; #quot;y#quot;: 10 #125;"] B -->|outer| C["Inner fn env: #123; #quot;z#quot;: 15 #125;"]
When looking up x from the inner function, get checks the inner env, then the function env, then the global env.
Evaluator (evaluator.zig)
#
The evaluator is a set of recursive functions that switch on AST node types and produce Objects.
Complete evaluator.zig
#
const std = @import("std");
const ast = @import("ast.zig");
const object = @import("object.zig");
const Environment = @import("environment.zig").Environment;
// const builtins = @import("builtins.zig"); // Added in Chapter 4.
// Explicit error set required because eval functions are recursive.
// (evalExpression -> evalIfExpression -> evalBlockStatement -> evalStatement -> evalExpression).
// Zig cannot infer error sets across recursive cycles.
const EvalError = std.mem.Allocator.Error;
// Singleton objects.
const TRUE = object.Object{ .boolean = .{ .value = true } };
const FALSE = object.Object{ .boolean = .{ .value = false } };
const NULL = object.Object{ .null = .{} };
fn nativeBoolToObject(value: bool) object.Object {
return if (value) TRUE else FALSE;
}
fn isError(obj: object.Object) bool {
return switch (obj) {
.err => true,
else => false,
};
}
fn newError(allocator: std.mem.Allocator, comptime fmt: []const u8, args: anytype) EvalError!object.Object {
const message = try std.fmt.allocPrint(allocator, fmt, args);
return .{ .err = .{ .message = message } };
}
/// Evaluates a complete program (list of statements).
pub fn evalProgram(allocator: std.mem.Allocator, program: ast.Program, env: *Environment) EvalError!object.Object {
var result: object.Object = NULL;
for (program.statements) |stmt| {
result = try evalStatement(allocator, stmt, env);
switch (result) {
.return_value => |rv| return rv.value.*,
.err => return result,
else => {},
}
}
return result;
}
/// Evaluates a block statement. It must bubble up to the enclosing function/program.
fn evalBlockStatement(allocator: std.mem.Allocator, block: ast.BlockStatement, env: *Environment) EvalError!object.Object {
var result: object.Object = NULL;
for (block.statements) |stmt| {
result = try evalStatement(allocator, stmt, env);
switch (result) {
.return_value, .err => return result,
else => {},
}
}
return result;
}
fn evalStatement(allocator: std.mem.Allocator, stmt: ast.Statement, env: *Environment) EvalError!object.Object {
return switch (stmt) {
.expression_statement => |es| try evalExpression(allocator, es.expression, env),
.let_statement => |ls| {
const val = try evalExpression(allocator, ls.value, env);
if (isError(val)) return val;
try env.set(ls.name, val);
return NULL;
},
.return_statement => |rs| {
const val = try evalExpression(allocator, rs.value, env);
if (isError(val)) return val;
const val_ptr = try allocator.create(object.Object);
val_ptr.* = val;
return .{ .return_value = .{ .value = val_ptr } };
},
};
}
fn evalExpression(allocator: std.mem.Allocator, expr: ast.Expression, env: *Environment) EvalError!object.Object {
return switch (expr) {
.integer_literal => |il| .{ .integer = .{ .value = il.value } },
.boolean => |b| nativeBoolToObject(b.value),
.string_literal => |s| .{ .string = .{ .value = s.value } },
.prefix => |p| {
const right = try evalExpression(allocator, p.right.*, env);
if (isError(right)) return right;
return evalPrefixExpression(allocator, p.operator, right);
},
.infix => |i| {
const left = try evalExpression(allocator, i.left.*, env);
if (isError(left)) return left;
const right = try evalExpression(allocator, i.right.*, env);
if (isError(right)) return right;
return evalInfixExpression(allocator, i.operator, left, right);
},
.if_expression => |ie| try evalIfExpression(allocator, ie, env),
.identifier => |id| evalIdentifier(allocator, id, env),
.function_literal => |fl| .{ .function = .{
.parameters = fl.parameters,
.body = fl.body,
.env = env,
} },
.call => |c| {
const func = try evalExpression(allocator, c.function.*, env);
if (isError(func)) return func;
var args: std.ArrayList(object.Object) = .empty;
for (c.arguments) |arg_expr| {
const arg = try evalExpression(allocator, arg_expr, env);
if (isError(arg)) return arg;
try args.append(allocator, arg);
}
return applyFunction(allocator, func, args.items);
},
.array_literal => |a| {
var elements: std.ArrayList(object.Object) = .empty;
for (a.elements) |elem_expr| {
const elem = try evalExpression(allocator, elem_expr, env);
if (isError(elem)) return elem;
try elements.append(allocator, elem);
}
return .{ .array = .{ .elements = try elements.toOwnedSlice(allocator) } };
},
.index_expression => |ie| {
const left = try evalExpression(allocator, ie.left.*, env);
if (isError(left)) return left;
const index = try evalExpression(allocator, ie.index.*, env);
if (isError(index)) return index;
return evalIndexExpression(allocator, left, index);
},
.hash_literal => |h| try evalHashLiteral(allocator, h, env),
};
}
// Prefix expressions.
fn evalPrefixExpression(allocator: std.mem.Allocator, operator: []const u8, right: object.Object) EvalError!object.Object {
if (std.mem.eql(u8, operator, "!")) {
return evalBangOperator(right);
} else if (std.mem.eql(u8, operator, "-")) {
return evalMinusPrefixOperator(allocator, right);
}
return newError(allocator, "unknown operator: {s}{s}", .{ operator, right.typeName() });
}
fn evalBangOperator(right: object.Object) object.Object {
return switch (right) {
.boolean => |b| nativeBoolToObject(!b.value),
.null => TRUE,
else => FALSE,
};
}
fn evalMinusPrefixOperator(allocator: std.mem.Allocator, right: object.Object) EvalError!object.Object {
return switch (right) {
.integer => |i| .{ .integer = .{ .value = -i.value } },
else => newError(allocator, "unknown operator: -{s}", .{right.typeName()}),
};
}
// Infix expressions.
fn evalInfixExpression(
allocator: std.mem.Allocator,
operator: []const u8,
left: object.Object,
right: object.Object,
) EvalError!object.Object {
// Integer arithmetic.
if (left == .integer and right == .integer) {
return evalIntegerInfixExpression(allocator, operator, left.integer.value, right.integer.value);
}
// String concatenation.
if (left == .string and right == .string) {
if (std.mem.eql(u8, operator, "+")) {
const new_val = try std.fmt.allocPrint(allocator, "{s}{s}", .{ left.string.value, right.string.value });
return .{ .string = .{ .value = new_val } };
}
return newError(allocator, "unknown operator: STRING {s} STRING", .{operator});
}
// Boolean comparisons.
if (left == .boolean and right == .boolean) {
if (std.mem.eql(u8, operator, "==")) return nativeBoolToObject(left.boolean.value == right.boolean.value);
if (std.mem.eql(u8, operator, "!=")) return nativeBoolToObject(left.boolean.value != right.boolean.value);
}
// Type mismatch.
if (@intFromEnum(left.objectType()) != @intFromEnum(right.objectType())) {
return newError(allocator, "type mismatch: {s} {s} {s}", .{ left.typeName(), operator, right.typeName() });
}
return newError(allocator, "unknown operator: {s} {s} {s}", .{ left.typeName(), operator, right.typeName() });
}
fn evalIntegerInfixExpression(allocator: std.mem.Allocator, operator: []const u8, left: i64, right: i64) EvalError!object.Object {
if (std.mem.eql(u8, operator, "+")) return .{ .integer = .{ .value = left + right } };
if (std.mem.eql(u8, operator, "-")) return .{ .integer = .{ .value = left - right } };
if (std.mem.eql(u8, operator, "*")) return .{ .integer = .{ .value = left * right } };
if (std.mem.eql(u8, operator, "/")) return .{ .integer = .{ .value = @divTrunc(left, right) } };
if (std.mem.eql(u8, operator, "<")) return nativeBoolToObject(left < right);
if (std.mem.eql(u8, operator, ">")) return nativeBoolToObject(left > right);
if (std.mem.eql(u8, operator, "==")) return nativeBoolToObject(left == right);
if (std.mem.eql(u8, operator, "!=")) return nativeBoolToObject(left != right);
return newError(allocator, "unknown operator: INTEGER {s} INTEGER", .{operator});
}
// Conditionals.
fn evalIfExpression(allocator: std.mem.Allocator, ie: ast.IfExpression, env: *Environment) EvalError!object.Object {
const condition = try evalExpression(allocator, ie.condition.*, env);
if (isError(condition)) return condition;
if (isTruthy(condition)) {
return evalBlockStatement(allocator, ie.consequence, env);
} else if (ie.alternative) |alt| {
return evalBlockStatement(allocator, alt, env);
} else {
return NULL;
}
}
fn isTruthy(obj: object.Object) bool {
return switch (obj) {
.null => false,
.boolean => |b| b.value,
else => true, // integers (including 0), strings, etc. are truthy
};
}
// Identifiers.
fn evalIdentifier(allocator: std.mem.Allocator, id: ast.Identifier, env: *Environment) EvalError!object.Object {
if (env.get(id.value)) |val| return val;
// Built-in function lookup is added in Chapter 4:
// if (builtins.lookup(id.value)) |func| return .{ .builtin = .{ .func = func } };
return newError(allocator, "identifier not found: {s}", .{id.value});
}
// Function application.
fn applyFunction(allocator: std.mem.Allocator, func: object.Object, args: []const object.Object) EvalError!object.Object {
return switch (func) {
.function => |f| {
// IMPORTANT: The enclosed environment must be heap-allocated.
// If it were stack-allocated, closures returned from this function
// would hold a dangling pointer to the destroyed stack frame.
// I filled 32GB of ram and could sear a steak on my laptop making this mistake.
const extended_env = try allocator.create(Environment);
extended_env.* = Environment.initEnclosed(allocator, f.env);
for (f.parameters, 0..) |param, i| {
try extended_env.set(param.value, args[i]);
}
const result = try evalBlockStatement(allocator, f.body, extended_env);
// Unwrap return value so it does not bubble past the function boundary.
return switch (result) {
.return_value => |rv| rv.value.*,
else => result,
};
},
.builtin => |b| b.func(allocator, args),
else => newError(allocator, "not a function: {s}", .{func.typeName()}),
};
}
// Index expressions.
fn evalIndexExpression(allocator: std.mem.Allocator, left: object.Object, index: object.Object) EvalError!object.Object {
if (left == .array and index == .integer) return evalArrayIndexExpression(left, index);
if (left == .hash) return evalHashIndexExpression(allocator, left, index);
return newError(allocator, "index operator not supported: {s}", .{left.typeName()});
}
fn evalArrayIndexExpression(array: object.Object, index: object.Object) object.Object {
const elements = array.array.elements;
const idx = index.integer.value;
const max: i64 = @as(i64, @intCast(elements.len)) - 1;
if (idx < 0 or idx > max) return NULL;
return elements[@intCast(idx)];
}
fn evalHashIndexExpression(allocator: std.mem.Allocator, hash_obj: object.Object, index: object.Object) EvalError!object.Object {
const hash = hash_obj.hash;
const key = index.hashKey() orelse
return newError(allocator, "unusable as hash key: {s}", .{index.typeName()});
const pair = hash.pairs.get(key) orelse return NULL;
return pair.value;
}
// Hash literals.
fn evalHashLiteral(allocator: std.mem.Allocator, node: ast.HashLiteral, env: *Environment) EvalError!object.Object {
var pairs = std.AutoHashMap(object.HashKey, object.HashPair).init(allocator);
for (node.pairs) |pair| {
const key = try evalExpression(allocator, pair.key, env);
if (isError(key)) return key;
const hash_key = key.hashKey() orelse
return newError(allocator, "unusable as hash key: {s}", .{key.typeName()});
const value = try evalExpression(allocator, pair.value, env);
if (isError(value)) return value;
try pairs.put(hash_key, .{ .key = key, .value = value });
}
return .{ .hash = .{ .pairs = pairs } };
}
Tests #
The evaluator tests need the lexer and parser, so they go in a separate file. Create src/evaluator_test.zig.
const std = @import("std");
const object = @import("object.zig");
const Environment = @import("environment.zig").Environment;
const evalProgram = @import("evaluator.zig").evalProgram;
// Test helper.
fn testEval(allocator: std.mem.Allocator, input: []const u8) !object.Object {
const Lexer = @import("lexer.zig").Lexer;
const Parser = @import("parser.zig").Parser;
var l = Lexer.init(input);
var p = Parser.init(allocator, &l);
const program = try p.parseProgram();
var env = Environment.init(allocator);
return try evalProgram(allocator, program, &env);
}
test "eval integer expression" {
const allocator = std.testing.allocator;
const tests = [_]struct { input: []const u8, expected: i64 }{
.{ .input = "5", .expected = 5 },
.{ .input = "10", .expected = 10 },
.{ .input = "-5", .expected = -5 },
.{ .input = "-10", .expected = -10 },
.{ .input = "5 + 5 + 5 + 5 - 10", .expected = 10 },
.{ .input = "2 * 2 * 2 * 2 * 2", .expected = 32 },
.{ .input = "-50 + 100 + -50", .expected = 0 },
.{ .input = "5 * 2 + 10", .expected = 20 },
.{ .input = "5 + 2 * 10", .expected = 25 },
.{ .input = "50 / 2 * 2 + 10", .expected = 60 },
.{ .input = "(5 + 10 * 2 + 15 / 3) * 2 + -10", .expected = 50 },
};
for (tests) |tt| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testEval(arena.allocator(), tt.input);
try std.testing.expectEqual(tt.expected, result.integer.value);
}
}
test "eval boolean expression" {
const allocator = std.testing.allocator;
const tests = [_]struct { input: []const u8, expected: bool }{
.{ .input = "true", .expected = true },
.{ .input = "false", .expected = false },
.{ .input = "1 < 2", .expected = true },
.{ .input = "1 > 2", .expected = false },
.{ .input = "1 == 1", .expected = true },
.{ .input = "1 != 1", .expected = false },
.{ .input = "true == true", .expected = true },
.{ .input = "true != false", .expected = true },
.{ .input = "(1 < 2) == true", .expected = true },
};
for (tests) |tt| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testEval(arena.allocator(), tt.input);
try std.testing.expectEqual(tt.expected, result.boolean.value);
}
}
test "bang operator" {
const allocator = std.testing.allocator;
const tests = [_]struct { input: []const u8, expected: bool }{
.{ .input = "!true", .expected = false },
.{ .input = "!false", .expected = true },
.{ .input = "!5", .expected = false },
.{ .input = "!!true", .expected = true },
.{ .input = "!!false", .expected = false },
.{ .input = "!!5", .expected = true },
};
for (tests) |tt| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testEval(arena.allocator(), tt.input);
try std.testing.expectEqual(tt.expected, result.boolean.value);
}
}
test "closures" {
const allocator = std.testing.allocator;
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const input =
\\let newAdder = fn(x) {
\\ fn(y) { x + y; };
\\};
\\let addTwo = newAdder(2);
\\addTwo(3);
;
const result = try testEval(arena.allocator(), input);
try std.testing.expectEqual(@as(i64, 5), result.integer.value);
}
test "error handling" {
const allocator = std.testing.allocator;
const tests = [_]struct { input: []const u8, expected: []const u8 }{
.{ .input = "5 + true;", .expected = "type mismatch: INTEGER + BOOLEAN" },
.{ .input = "5 + true; 5;", .expected = "type mismatch: INTEGER + BOOLEAN" },
.{ .input = "-true", .expected = "unknown operator: -BOOLEAN" },
.{ .input = "foobar", .expected = "identifier not found: foobar" },
};
for (tests) |tt| {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const result = try testEval(arena.allocator(), tt.input);
try std.testing.expectEqualStrings(tt.expected, result.err.message);
}
}
Verify it works #
Make sure you uncommented the evaluator_test.zig line in build.zig, then run zig build test. No output means all tests passed.
In chapter 4 we add strings, arrays, hashes, and built-in functions to complete the interpreter.