Abstract Syntax Trees for a Custom Programming Language

Published: Thursday, March 30, 2023

Greetings, friends! This is my nineteenth post in the math expression parser series. In this tutorial, we'll learn how to build an abstract syntax tree (AST) for the custom programming language we made in the previous tutorial. Let's begin!

Revisiting our TML Tokenizer

In the previous tutorial, we created a tokenizer file named Tokenizer.js with the following contents.

js
Copied! ⭐️
const TokenTypes = {
  VAR: 'VAR',
  PRINT: 'PRINT',
  NUMBER: 'NUMBER',
  IDENTIFIER: 'IDENTIFIER',
  ADDITION: '+',
  SUBTRACTION: '-',
  MULTIPLICATION: '*',
  DIVISION: '/',
  EXPONENTIATION: '^',
  PARENTHESIS_LEFT: '(',
  PARENTHESIS_RIGHT: ')',
  SEMICOLON: ';',
  ASSIGNMENT: '='
}

const TokenSpec = [
  [/^\s+/, null],
  [/^\bvar\b/, TokenTypes.VAR],
  [/^\bprint\b/, TokenTypes.PRINT],
  [/^(?:\d+(?:\.\d*)?|\.\d+)/, TokenTypes.NUMBER],
  [/^[a-z]+/, TokenTypes.IDENTIFIER],
  [/^\+/, TokenTypes.ADDITION],
  [/^\-/, TokenTypes.SUBTRACTION],
  [/^\*/, TokenTypes.MULTIPLICATION],
  [/^\//, TokenTypes.DIVISION],
  [/^\^/, TokenTypes.EXPONENTIATION],
  [/^\(/, TokenTypes.PARENTHESIS_LEFT],
  [/^\)/, TokenTypes.PARENTHESIS_RIGHT],
  [/^;/, TokenTypes.SEMICOLON],
  [/^\=/, TokenTypes.ASSIGNMENT]
]

class Tokenizer {
  constructor(input) {
    this.input = input
    this.cursor = 0
  }

  hasMoreTokens() {
    return this.cursor < this.input.length
  }

  match(regex, inputSlice) {
    const matched = regex.exec(inputSlice)
    if (matched === null) {
      return null
    }

    this.cursor += matched[0].length
    return matched[0]
  }

  getNextToken() {
    if (!this.hasMoreTokens()) {
      return null
    }

    const inputSlice = this.input.slice(this.cursor)

    for (let [regex, type] of TokenSpec) {
      const tokenValue = this.match(regex, inputSlice)

      // No rule was matched!
      if (tokenValue === null) {
        continue
      }

      // Skip whitespace!
      if (type === null) {
        return this.getNextToken()
      }

      return {
        type,
        value: tokenValue
      }
    }

    throw new SyntaxError(`Unexpected token: "${inputSlice[0]}"`)
  }

  printAllTokens() {
    let token
    while ((token = this.getNextToken())) {
      console.log(token)
    }
  }
}

module.exports = {
  TokenTypes,
  Tokenizer
}

Revisiting our TML Parser

In the previous tutorial, we created a Pratt parser file named Parser.js with the following contents.

js
Copied! ⭐️
const { TokenTypes, Tokenizer } = require('./Tokenizer')

class SymbolTable {
  constructor() {
    this.symbolTable = {}
  }

  set(name, value) {
    this.symbolTable[name] = value
  }

  get(name) {
    if (this.has(name)) {
      return this.symbolTable[name]
    }
  }

  has(name) {
    if (this.symbolTable.hasOwnProperty(name)) {
      return true
    } else throw new Error(`${name} has not been declared.`)
  }
}

class Parser {
  parse(input) {
    this.input = input
    this.tokenizer = new Tokenizer(input)
    this.lookahead = this.tokenizer.getNextToken()
    this.operators = {
      '^': 5,
      unary: 4,
      '*': 3,
      '/': 3,
      '+': 2,
      '-': 2
    }
    this.symbolTable = new SymbolTable()

    return this.Program()
  }

  eat(tokenType) {
    const token = this.lookahead

    if (token == null) {
      throw new SyntaxError(`Unexpected end of input, expected "${tokenType}"`)
    }

    if (token.type !== tokenType) {
      throw new SyntaxError(
        `Unexpected token: "${token.value}", expected "${tokenType}"`
      )
    }

    this.lookahead = this.tokenizer.getNextToken()

    return token
  }

  getPrecedence(token) {
    if (token === 'unary') {
      return this.operators.unary
    }

    if (token && token.type != TokenTypes.PARENTHESIS_RIGHT) {
      return this.operators[token.value]
    }

    return 0
  }

  trig(id, value) {
    switch (id) {
      case 'sin':
        return Math.sin(value)
      case 'cos':
        return Math.cos(value)
      case 'tan':
        return Math.tan(value)
      default:
        throw new Error(`Invalid operation: ${id}`)
    }
  }

  /**
   * Program
   *    = StatementList
   */
  Program() {
    return this.StatementList()
  }

  /**
   * StatementList
   *    = Statement+
   */
  StatementList() {
    const statementList = [this.Statement()]

    while (this.lookahead !== null) {
      statementList.push(this.Statement())
    }

    return statementList
  }

  /**
   * Statement
   *    = VariableStatement
   *    / PrintStatement
   *    / ExpressionStatement
   */
  Statement() {
    if (this.lookahead.type === TokenTypes.VAR) {
      return this.VariableStatement()
    }

    if (this.lookahead.type === TokenTypes.PRINT) {
      return this.PrintStatement()
    }

    return this.ExpressionStatement()
  }

  /**
   * VariableStatement
   *    = "var" IDENTIFIER "=" Expression ";"
   */
  VariableStatement() {
    this.eat(TokenTypes.VAR)
    const name = this.eat(TokenTypes.IDENTIFIER).value
    this.eat(TokenTypes.ASSIGNMENT)
    const value = this.Expression()
    this.eat(TokenTypes.SEMICOLON)

    this.symbolTable.set(name, value)
  }

  /**
   * PrintStatement
   *    = "print" ParenthesizedExpression ";"
   */
  PrintStatement() {
    this.eat(TokenTypes.PRINT)
    const expression = this.ParenthesizedExpression()
    this.eat(TokenTypes.SEMICOLON)
    console.log(expression)
  }

  /**
   * ExpressionStatement
   *    = Expression ";"
   */
  ExpressionStatement() {
    const expression = this.Expression()
    this.eat(TokenTypes.SEMICOLON)
    return expression
  }

  /**
   * Expression
   *    = Prefix (Infix)*
   */
  Expression(prec = 0) {
    let left = this.Prefix()

    while (prec < this.getPrecedence(this.lookahead)) {
      left = this.Infix(left, this.lookahead?.type)
    }

    return left
  }

  /**
   * Prefix
   *    = ParenthesizedExpression
   *    / UnaryExpression
   *    / VariableOrFunctionExpression
   *    / NUMBER
   */
  Prefix() {
    if (this.lookahead.type === TokenTypes.PARENTHESIS_LEFT) {
      return this.ParenthesizedExpression()
    }

    if (this.lookahead.type === TokenTypes.SUBTRACTION) {
      return this.UnaryExpression()
    }

    if (this.lookahead.type === TokenTypes.IDENTIFIER) {
      return this.VariableOrFunctionExpression()
    }

    const token = this.eat(TokenTypes.NUMBER)
    return Number(token.value)
  }

  /**
   * Infix
   *    = ("+" / "-" / "*" / "/" / "^") Expression
   */
  Infix(left, operatorType) {
    let token = this.eat(operatorType)
    let newPrec = this.operators[token.value]
    switch (token.type) {
      case TokenTypes.ADDITION:
        return left + this.Expression(newPrec)
      case TokenTypes.SUBTRACTION:
        return left - this.Expression(newPrec)
      case TokenTypes.MULTIPLICATION:
        return left * this.Expression(newPrec)
      case TokenTypes.DIVISION:
        return left / this.Expression(newPrec)
      case TokenTypes.EXPONENTIATION:
        return left ** this.Expression(newPrec - 1)
    }
  }

  /**
   * ParenthesizedExpression
   *    = "(" Expression ")"
   */
  ParenthesizedExpression() {
    this.eat(TokenTypes.PARENTHESIS_LEFT)
    const expression = this.Expression()
    this.eat(TokenTypes.PARENTHESIS_RIGHT)
    return expression
  }

  /**
   * UnaryExpression
   *    = "-" Expression
   */
  UnaryExpression() {
    this.eat(TokenTypes.SUBTRACTION)
    return -this.Expression(this.getPrecedence('unary'))
  }

  /**
   * VariableOrFunctionExpression
   *    = FunctionExpression
   *    / Variable
   */
  VariableOrFunctionExpression() {
    const id = this.eat(TokenTypes.IDENTIFIER).value

    if (this.lookahead.type === TokenTypes.PARENTHESIS_LEFT) {
      return this.FunctionExpression(id)
    }

    return this.Variable(id)
  }

  /**
   * Variable
   *    = IDENTIFIER
   */
  Variable(id) {
    return this.symbolTable.get(id)
  }

  /**
   * FunctionExpression
   *    = IDENTIFIER ParenthesizedExpression
   */
  FunctionExpression(id) {
    const expression = this.ParenthesizedExpression()
    return this.trig(id, expression)
  }
}

module.exports = { Parser }

We've been using this Pratt parser to evaluate statements and math expressions on the fly. Below is an example of how we used this parser to evaluate statements containing math expressions.

js
Copied! ⭐️
const parser = new Parser()
parser.parse('var x = 1 + 2 * 3; print(x);') // 7

Abstract Syntax Tree Structure

Now, it's time to return AST nodes instead performing side effects such as printing to the console or setting variables. The AST we build in this tutorial will be a bit different from the ASTs we've built in the previous tutorials. Since the TML language has statements instead of only math expressions, we need to create a few new AST nodes. We can still reuse the AST nodes from the previous tutorials when dealing with math expressions.

Let's look at a preview of what our AST might look like by the end of this tutorial. If we had the statement, var x = 1 + 2 * 3; print(x);, our AST will look similar to the following:

js
Copied! ⭐️
{
  type: 'Program',
  body: [
    {
      type: 'VariableStatement',
      name: 'x',
      value: {
        type: 'BinaryExpression',
        operator: '+',
        left: { type: 'Number', value: 1 },
        right: {
          type: 'BinaryExpression',
          operator: '*',
          left: { type: 'Number', value: 2 },
          right: { type: 'Number', value: 3 }
        }
      }
    },
    { type: 'PrintStatement', value: { type: 'Variable', name: 'x' } }
  ]
}

The AST will start with a Program node at the top of the tree. Then, it'll contain a body property which represents a "statement list". Notice that there are two statements in the list: VariableStatement and PrintStatement. The VariableStatement contains an expression, and the PrintStatement contains a variable (for retrieval from a symbol table).

Let's look at all the different kinds of AST nodes we'll create in this tutorial.

Program AST Node

When we parse a TML program, it should begin with the Program AST node which will have the following structure:

js
Copied! ⭐️
{
  type: 'Program',
  body: [ ]
}

The body will contain a list of statements, given by the StatementList method. The Program method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
Program() {
  return {
    type: 'Program',
    body: this.StatementList()
  }
}

VariableStatement AST Node

The VariableStatement AST node will contain the name of a variable and the value stored in that variable.

js
Copied! ⭐️
{
  type: 'VariableStatement',
  name: '',
  value: { }
}

The VariableStatement method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
VariableStatement() {
  this.eat(TokenTypes.VAR)
  const name = this.eat(TokenTypes.IDENTIFIER).value
  this.eat(TokenTypes.ASSIGNMENT)
  const value = this.Expression()
  this.eat(TokenTypes.SEMICOLON)

  return {
    type: 'VariableStatement',
    name,
    value
  }
}

PrintStatement AST Node

The PrintStatement AST node will contain the value that we want to print to the console. This value will contain an expression or numeric literal.

js
Copied! ⭐️
{
  type: 'PrintStatement',
  value: { }
}

The PrintStatement method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
PrintStatement() {
  this.eat(TokenTypes.PRINT)
  const expression = this.ParenthesizedExpression()
  this.eat(TokenTypes.SEMICOLON)

  return {
    type: 'PrintStatement',
    value: expression
  }
}

ExpressionStatement AST Node

The ExpressionStatement AST node will contain an expression AST inside the value property.

js
Copied! ⭐️
{
  type: 'ExpressionStatement',
  value: { }
}

The ExpressionStatement method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
ExpressionStatement() {
  const expression = this.Expression()
  this.eat(TokenTypes.SEMICOLON)

  return {
    type: 'ExpressionStatement',
    value: expression
  }
}

Variable AST Node

The Variable AST node will contain the name of a variable stored in a symbol table. The value stored in this variable will be retrieved when used in an expression such as 1 + 2 * x.

js
Copied! ⭐️
{
  type: 'Variable',
  name: '',
}

The Variable method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
Variable(id) {
  return {
    type: 'Variable',
    name: id
  }
}

BinaryExpression AST Node

The BinaryExpression AST node will contain a binary expression between two expressions or numeric literals which will be on the left side of an operator and right side of an operator. It will contain the type of operation including addition +, subtraction -, multiplication *, division /, and exponentiation ^.

js
Copied! ⭐️
{
  type: 'BinaryExpression',
  operator: ''
  left: { },
  right: { }
}

The Infix method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
Infix(left, operatorType) {
  let token = this.eat(operatorType)
  let newPrec = this.operators[token.value]

  return {
    type: 'BinaryExpression',
    operator: token.value,
    left,
    right: this.Expression(
      token.type === TokenTypes.EXPONENTIATION ? newPrec - 1 : newPrec
    )
  }
}

UnaryExpression AST Node

The UnaryExpression AST node will contain an expression AST inside the value property. We do not store the negative version of the expression inside our AST because an AST evaluator will detect the AST node type as UnaryExpression and perform a negation for us.

js
Copied! ⭐️
{
  type: 'UnaryExpression',
  value: { }
}

The UnaryExpression method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
UnaryExpression() {
  this.eat(TokenTypes.SUBTRACTION)

  return {
    type: 'UnaryExpression',
    value: this.Expression(this.getPrecedence('unary'))
  }
}

Function AST Node

The Function AST node will contain the name of our function such as sin, cos, and tan. The value will be an expression or numeric value that goes inside the function argument.

js
Copied! ⭐️
{
  type: 'Function',
  name: '',
  value: { }
}

The FunctionExpression method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
FunctionExpression(id) {
  const expression = this.ParenthesizedExpression()

  return {
    type: 'Function',
    name: id,
    value: expression
  }
}

Number AST Node

Finally, our Number AST node will contain a numeric literal. Since our tokenizer is setup to handle decimal numbers, the value can contain an integer or decimal value.

js
Copied! ⭐️
{
  type: 'Number',
  value: 1
}

The Prefix method inside our parser should be rewritten to return an AST node.

js
Copied! ⭐️
Prefix() {
  if (this.lookahead.type === TokenTypes.PARENTHESIS_LEFT) {
    return this.ParenthesizedExpression()
  }

  if (this.lookahead.type === TokenTypes.SUBTRACTION) {
    return this.UnaryExpression()
  }

  if (this.lookahead.type === TokenTypes.IDENTIFIER) {
    return this.VariableOrFunctionExpression()
  }

  const token = this.eat(TokenTypes.NUMBER)

  return {
    type: 'Number',
    value: Number(token.value)
  }
}

Finished Code for the TML AST Generator

Now that we've seen all the different types of AST nodes we'll use to build our AST, let's look at the fully completed TML parser that has all the adjustments we made to each method. Create a new file named Parser-AST.js and insert the following contents:

js
Copied! ⭐️
const { TokenTypes, Tokenizer } = require('./Tokenizer')

class Parser {
  parse(input) {
    this.input = input
    this.tokenizer = new Tokenizer(input)
    this.lookahead = this.tokenizer.getNextToken()
    this.operators = {
      '^': 5,
      unary: 4,
      '*': 3,
      '/': 3,
      '+': 2,
      '-': 2
    }

    return this.Program()
  }

  eat(tokenType) {
    const token = this.lookahead

    if (token == null) {
      throw new SyntaxError(`Unexpected end of input, expected "${tokenType}"`)
    }

    if (token.type !== tokenType) {
      throw new SyntaxError(
        `Unexpected token: "${token.value}", expected "${tokenType}"`
      )
    }

    this.lookahead = this.tokenizer.getNextToken()

    return token
  }

  getPrecedence(token) {
    if (token === 'unary') {
      return this.operators.unary
    }

    if (token && token.type != TokenTypes.PARENTHESIS_RIGHT) {
      return this.operators[token.value]
    }

    return 0
  }

  /**
   * Program
   *    = StatementList
   */
  Program() {
    return {
      type: 'Program',
      body: this.StatementList()
    }
  }

  /**
   * StatementList
   *    = Statement+
   */
  StatementList() {
    const statementList = [this.Statement()]

    while (this.lookahead !== null) {
      statementList.push(this.Statement())
    }

    return statementList
  }

  /**
   * Statement
   *    = VariableStatement
   *    / PrintStatement
   *    / ExpressionStatement
   */
  Statement() {
    if (this.lookahead.type === TokenTypes.VAR) {
      return this.VariableStatement()
    }

    if (this.lookahead.type === TokenTypes.PRINT) {
      return this.PrintStatement()
    }

    return this.ExpressionStatement()
  }

  /**
   * VariableStatement
   *    = "var" IDENTIFIER "=" Expression ";"
   */
  VariableStatement() {
    this.eat(TokenTypes.VAR)
    const name = this.eat(TokenTypes.IDENTIFIER).value
    this.eat(TokenTypes.ASSIGNMENT)
    const value = this.Expression()
    this.eat(TokenTypes.SEMICOLON)

    return {
      type: 'VariableStatement',
      name,
      value
    }
  }

  /**
   * PrintStatement
   *    = "print" ParenthesizedExpression ";"
   */
  PrintStatement() {
    this.eat(TokenTypes.PRINT)
    const expression = this.ParenthesizedExpression()
    this.eat(TokenTypes.SEMICOLON)

    return {
      type: 'PrintStatement',
      value: expression
    }
  }

  /**
   * ExpressionStatement
   *    = Expression ";"
   */
  ExpressionStatement() {
    const expression = this.Expression()
    this.eat(TokenTypes.SEMICOLON)

    return {
      type: 'ExpressionStatement',
      value: expression
    }
  }

  /**
   * Expression
   *    = Prefix (Infix)*
   */
  Expression(prec = 0) {
    let left = this.Prefix()

    while (prec < this.getPrecedence(this.lookahead)) {
      left = this.Infix(left, this.lookahead?.type)
    }

    return left
  }

  /**
   * Prefix
   *    = ParenthesizedExpression
   *    / UnaryExpression
   *    / VariableOrFunctionExpression
   *    / NUMBER
   */
  Prefix() {
    if (this.lookahead.type === TokenTypes.PARENTHESIS_LEFT) {
      return this.ParenthesizedExpression()
    }

    if (this.lookahead.type === TokenTypes.SUBTRACTION) {
      return this.UnaryExpression()
    }

    if (this.lookahead.type === TokenTypes.IDENTIFIER) {
      return this.VariableOrFunctionExpression()
    }

    const token = this.eat(TokenTypes.NUMBER)

    return {
      type: 'Number',
      value: Number(token.value)
    }
  }

  /**
   * Infix
   *    = ("+" / "-" / "*" / "/" / "^") Expression
   */
  Infix(left, operatorType) {
    let token = this.eat(operatorType)
    let newPrec = this.operators[token.value]

    return {
      type: 'BinaryExpression',
      operator: token.value,
      left,
      right: this.Expression(
        token.type === TokenTypes.EXPONENTIATION ? newPrec - 1 : newPrec
      )
    }
  }

  /**
   * ParenthesizedExpression
   *    = "(" Expression ")"
   */
  ParenthesizedExpression() {
    this.eat(TokenTypes.PARENTHESIS_LEFT)
    const expression = this.Expression()
    this.eat(TokenTypes.PARENTHESIS_RIGHT)
    return expression
  }

  /**
   * UnaryExpression
   *    = "-" Expression
   */
  UnaryExpression() {
    this.eat(TokenTypes.SUBTRACTION)

    return {
      type: 'UnaryExpression',
      value: this.Expression(this.getPrecedence('unary'))
    }
  }

  /**
   * VariableOrFunctionExpression
   *    = FunctionExpression
   *    / Variable
   */
  VariableOrFunctionExpression() {
    const id = this.eat(TokenTypes.IDENTIFIER).value

    if (this.lookahead.type === TokenTypes.PARENTHESIS_LEFT) {
      return this.FunctionExpression(id)
    }

    return this.Variable(id)
  }

  /**
   * Variable
   *    = IDENTIFIER
   */
  Variable(id) {
    return {
      type: 'Variable',
      name: id
    }
  }

  /**
   * FunctionExpression
   *    = IDENTIFIER ParenthesizedExpression
   */
  FunctionExpression(id) {
    const expression = this.ParenthesizedExpression()

    return {
      type: 'Function',
      name: id,
      value: expression
    }
  }
}

module.exports = { Parser }

Overview of the Parser Modifications

Notice all the changes we made to our parser. Instead of returning a single value in most of our methods, we return an AST node. We no longer need the SymbolTable class because we'll handle variable storage and retrieval when we build an AST evaluator later. We removed the trig method as well because we'll handle computations inside the AST evaluator.

We can import this parser into a file named ast-example.js with the following contents:

js
Copied! ⭐️
const { Parser } = require('./Parser-AST')

const parser = new Parser()
const ast = parser.parse('var x = 1 + 2 * 3; print(x);')
console.dir(ast, { depth: null }) // log nested objects

We can execute this code using the following command:

shell
Copied! ⭐️
node ast-example

We should see the following AST displayed in the terminal.

js
Copied! ⭐️
{
  type: 'Program',
  body: [
    {
      type: 'VariableStatement',
      name: 'x',
      value: {
        type: 'BinaryExpression',
        operator: '+',
        left: { type: 'Number', value: 1 },
        right: {
          type: 'BinaryExpression',
          operator: '*',
          left: { type: 'Number', value: 2 },
          right: { type: 'Number', value: 3 }
        }
      }
    },
    { type: 'PrintStatement', value: { type: 'Variable', name: 'x' } }
  ]
}

Evaluating the AST

Since our AST has a different structure from the ones we created in previous tutorials, we need to build a new AST evaluator. We'll create a new file called NodeVisitor.js and add the following contents.

js
Copied! ⭐️
class SymbolTable {
  constructor() {
    this.symbolTable = {}
  }

  set(name, value) {
    this.symbolTable[name] = value
  }

  get(name) {
    if (this.has(name)) {
      return this.symbolTable[name]
    }
  }

  has(name) {
    if (this.symbolTable.hasOwnProperty(name)) {
      return true
    } else throw new Error(`${name} has not been declared.`)
  }
}

class NodeVisitor {
  constructor() {
    this.symbolTable = new SymbolTable()
  }

  visit(node) {
    switch (node.type) {
      case 'Program':
        return this.visitProgram(node)
      case 'VariableStatement':
        return this.visitVariableStatement(node)
      case 'PrintStatement':
        return this.visitPrintStatement(node)
      case 'ExpressionStatement':
        return this.visitExpressionStatement(node)
      case 'Variable':
        return this.visitVariable(node)
      case 'BinaryExpression':
        return this.visitBinaryExpression(node)
      case 'UnaryExpression':
        return this.visitUnaryExpression(node)
      case 'Function':
        return this.visitFunction(node)
      case 'Number':
        return this.visitNumber(node)
    }
  }

  visitProgram(node) {
    const results = []

    for (const statementNode of node.body) {
      results.push(this.visit(statementNode))
    }

    return results
  }

  visitVariableStatement(node) {
    this.symbolTable.set(node.name, this.visit(node.value))
  }

  visitPrintStatement(node) {
    console.log(node.value)
  }

  visitExpressionStatement(node) {
    return this.visit(node.value)
  }

  visitVariable(node) {
    return this.symbolTable.get(node.name)
  }

  visitBinaryExpression(node) {
    switch (node.operator) {
      case '+':
        return this.visit(node.left) + this.visit(node.right)
      case '-':
        return this.visit(node.left) - this.visit(node.right)
      case '*':
        return this.visit(node.left) * this.visit(node.right)
      case '/':
        return this.visit(node.left) / this.visit(node.right)
      case '^':
        return this.visit(node.left) ** this.visit(node.right)
      default:
        throw new Error(`Invalid operation: ${node.operator}`)
    }
  }

  visitUnaryExpression(node) {
    return -this.visit(node.value)
  }

  visitFunction(node) {
    switch (node.name) {
      case 'sin':
        return Math.sin(this.visit(node.value))
      case 'cos':
        return Math.cos(this.visit(node.value))
      case 'tan':
        return Math.tan(this.visit(node.value))
      default:
        throw new Error(`Invalid function: ${node.name}`)
    }
  }

  visitNumber(node) {
    return node.value
  }
}

module.exports = { NodeVisitor }

Notice how we added the SymbolTable class into our AST evaluator. Since our new TML parser returns an AST, it doesn't have to worry about performing side effects or actions. Those jobs will be taken care by the AST evaluator instead.

Remember, the purpose of an AST is to store a program in an intermediate representation (IR) format and defer execution until later. This lets the user decide how they want to use the AST. The user could decide to evaluate the AST using a different language like Rust if they wanted to even though the AST is built using JavaScript 😅

Testing the AST

We can test our AST evaluator against a variety of statements using the tests.js file we created in the previous tutorial as a base.

Inside the tests.js file, import the Parser class from the Parser-AST.js file and the NodeVisitor class from the NodeVisitor.js file.

Then, we'll update the tests to run against an evaluated AST instead of using the parser from the previous tutorial that directly returned an array of values.

js
Copied! ⭐️
const assert = require('assert').strict
const { Parser } = require('./Parser-AST')
const { NodeVisitor } = require('./NodeVisitor')

const mathTests = [
  // Expression statements
  ['1;', [1]],
  [' 2 ;', [2]],
  ['1 + 2;', [3]],
  [' 1 + 2 ;', [3]],
  ['1 + 2 * 3;', [7]],
  ['(1 + 2) * 3;', [9]],
  ['5 - 2;', [3]],
  ['5 - 2 - 1;', [2]],
  ['12 / 2 / 3;', [2]],
  ['2 ^ 3 + 1;', [9]],
  ['-2 ^ 2;', [-4]],
  ['(-2) ^ 2;', [4]],
  ['-2 ^ 2 + 1;', [-3]],
  ['cos(0) + 3 * -4 / -2 ^ 2;', [4]],

  // // Variable Statements
  ['var x = 1; x + 1;', [undefined, 2]],
  ['var x = (1 + 2) * 3; x + 1;', [undefined, 10]],
  ['var x = 1; var y = 2; x + y;', [undefined, undefined, 3]]
]

const parser = new Parser()
const visitor = new NodeVisitor()
let result

for (const [expression, expected] of mathTests) {
  try {
    const ast = parser.parse(expression)
    result = visitor.visit(ast)
    assert.deepEqual(result, expected)
  } catch (err) {
    const lines = '-'.repeat(process.stdout.columns)
    console.log(lines)
    console.log(`Expression failed: "${expression}"`)
    console.log(`Expected result: ${expected}`)
    console.log(`Actual result: ${result}`)
    console.log(lines)
    throw err
  }
}

console.log('All tests passed! 🎉')

We can execute this file using the following command:

shell
Copied! ⭐️
node tests

If everything goes well, then all the tests should pass, and you should give yourself a pat on the back. Well done! You have now created a parser for a small programming language that can output an AST! You also built an AST interpreter/evaluator to go along with it! Amazing! 🤩

Conclusion

In this tutorial, we learned how to make a parser that returned an abstract syntax tree representation of a program written in a custom programming language called "Tiny Math Language", or TML for short.

In the previous tutorial, we evaluated statements on the fly, but now we generate an AST. Remember, an AST gives the user freedom to choose how they would like to evaluate a math expression and lets us defer execution until later.

I hope you enjoyed my two-part "bonus" tutorial series on creating a small programming language! We've finally reached the end of this 20-part math expression parser series. If this series has helped you in any way, please consider donating to my cause! I would be eternally grateful 🙇 💚 💚

The next article is the conclusion of this tutorial series, Part 20. We'll review what we learned and discuss how our new knowledge of parsing can help us in our software development career! See you there! ⭐