Abstract Syntax Trees with a Recursive Descent Parser

Published: Friday, March 24, 2023
Updated: Tuesday, March 28, 2023

Greetings, friends! This is my fifteenth post in the math expression parser series. In this tutorial, we'll make adjustments to the recursive descent parser we made in the previous tutorial, so that we can generate abstract syntax tree (AST) representations of math expressions. Let's begin!

Revisiting our Recursive Descent Parser

Let's revisit the recursive descent parser we developed in the previous tutorial. First, we created a tokenizer in a file named Tokenizer.js with the following contents.

js
Copied! ⭐️
const TokenTypes = {
  NUMBER: 'NUMBER',
  IDENTIFIER: 'IDENTIFIER',
  ADDITION: '+',
  SUBTRACTION: '-',
  MULTIPLICATION: '*',
  DIVISION: '/',
  EXPONENTIATION: '^',
  PARENTHESIS_LEFT: '(',
  PARENTHESIS_RIGHT: ')'
};

const TokenSpec = [
  [/^\s+/, null],
  [/^(?:\d+(?:\.\d*)?|\.\d+)/, TokenTypes.NUMBER],
  [/^[a-z]+/, TokenTypes.IDENTIFIER],
  [/^\+/, TokenTypes.ADDITION],
  [/^\-/, TokenTypes.SUBTRACTION],
  [/^\*/, TokenTypes.MULTIPLICATION],
  [/^\//, TokenTypes.DIVISION],
  [/^\^/, TokenTypes.EXPONENTIATION],
  [/^\(/, TokenTypes.PARENTHESIS_LEFT],
  [/^\)/, TokenTypes.PARENTHESIS_RIGHT]
];

class Tokenizer {
  constructor(input) {
    this.input = input;
    this.cursor = 0;
  }

  hasMoreTokens() {
    return this.cursor < this.input.length;
  }

  match(regex, inputSlice) {
    const matched = regex.exec(inputSlice);
    if (matched === null) {
      return null;
    }

    this.cursor += matched[0].length;
    return matched[0];
  }

  getNextToken() {
    if (!this.hasMoreTokens()) {
      return null;
    }

    const inputSlice = this.input.slice(this.cursor);

    for (let [regex, type] of TokenSpec) {
      const tokenValue = this.match(regex, inputSlice);

      if (tokenValue === null) {
        continue;
      }

      if (type === null) {
        return this.getNextToken();
      }

      return {
        type,
        value: tokenValue,
      };
    }

    throw new SyntaxError(`Unexpected token: "${inputSlice[0]}"`);
  }
}

module.exports = {
  TokenTypes,
  Tokenizer
}

Then, we used this tokenizer inside our recursive descent parser. We had created a file called Parser.js and added a Parser class that contained methods for evaluating math expressions. By the end of the previous tutorial, our parser had the following contents.

js
Copied! ⭐️
const { TokenTypes, Tokenizer } = require('./Tokenizer')

class Parser {
  parse(input) {
    this.input = input
    this.tokenizer = new Tokenizer(input)
    this.lookahead = this.tokenizer.getNextToken()

    return this.Expression()
  }

  eat(tokenType) {
    const token = this.lookahead

    if (token == null) {
      throw new SyntaxError(`Unexpected end of input, expected "${tokenType}"`)
    }

    if (token.type !== tokenType) {
      throw new SyntaxError(
        `Unexpected token: "${token.value}", expected "${tokenType}"`
      )
    }

    this.lookahead = this.tokenizer.getNextToken()

    return token
  }

  trig(id, value) {
    switch (id) {
      case 'sin':
        return Math.sin(value)
      case 'cos':
        return Math.cos(value)
      case 'tan':
        return Math.tan(value)
      default:
        throw new Error(`Invalid operation: ${id}`)
    }
  }

  BinaryExpression(leftRule, rightRule, operatorType1, operatorType2) {
    let left = leftRule()

    while (
      this.lookahead &&
      (this.lookahead.type === operatorType1 ||
        this.lookahead.type === operatorType2)
    ) {
      const operator = this.eat(this.lookahead.type).type
      switch (operator) {
        case TokenTypes.ADDITION:
          left = left + rightRule()
          break
        case TokenTypes.SUBTRACTION:
          left = left - rightRule()
          break
        case TokenTypes.MULTIPLICATION:
          left = left * rightRule()
          break
        case TokenTypes.DIVISION:
          left = left / rightRule()
          break
        case TokenTypes.EXPONENTIATION:
          left = left ** rightRule()
          break
      }
    }

    return left
  }

  /**
   * Expression
   *    = Term (("+" / "-") Term)*
   */
  Expression() {
    return this.BinaryExpression(
      () => this.Term(),
      () => this.Term(),
      TokenTypes.ADDITION,
      TokenTypes.SUBTRACTION
    )
  }

  /**
   * Term
   *    = Factor (("*" / "/") Factor)*
   */
  Term() {
    return this.BinaryExpression(
      () => this.Factor(),
      () => this.Factor(),
      TokenTypes.MULTIPLICATION,
      TokenTypes.DIVISION
    )
  }

  /**
   * Factor
   *    = Primary ("^" Factor)*
   */
  Factor() {
    return this.BinaryExpression(
      () => this.Primary(),
      () => this.Factor(),
      TokenTypes.EXPONENTIATION
    )
  }

  /**
   * Primary
   *    = ParenthesizedExpression
   *    / UnaryExpression
   *    / FunctionExpression
   *    / NUMBER
   */
  Primary() {
    if (this.lookahead.type === TokenTypes.PARENTHESIS_LEFT) {
      return this.ParenthesizedExpression()
    }

    if (this.lookahead.type === TokenTypes.SUBTRACTION) {
      return this.UnaryExpression()
    }

    if (this.lookahead.type === TokenTypes.IDENTIFIER) {
      return this.FunctionExpression()
    }

    const token = this.eat(TokenTypes.NUMBER)
    return Number(token.value)
  }

  /**
   * ParenthesizedExpression
   *    = "(" Expression ")"
   */
  ParenthesizedExpression() {
    this.eat(TokenTypes.PARENTHESIS_LEFT)
    const expression = this.Expression()
    this.eat(TokenTypes.PARENTHESIS_RIGHT)

    return expression
  }

  /**
   * UnaryExpression
   *    = "-" Factor
   */
  UnaryExpression() {
    this.eat(TokenTypes.SUBTRACTION)
    return -this.Factor()
  }

  /**
   * FunctionExpression
   *    = IDENTIFIER ParenthesizedExpression
   */
  FunctionExpression() {
    const id = this.eat(TokenTypes.IDENTIFIER).value
    return this.trig(id, this.ParenthesizedExpression())
  }
}

Below is an example of how we used our recursive descent parser to evaluate math expressions.

js
Copied! ⭐️
const parser = new Parser()
console.log(parser.parse('1 + 2 * 3')) // 7

Building Abstract Syntax Trees

Now, it's time to change the return value of some of the methods in our recursive descent parser such that they return an AST node instead of a number. Our AST will be identical to the ones we've constructed in Part 8, Part 11, and Part 13 of this tutorial series.

Create a new file named Parser-AST.js with the following contents:

js
Copied! ⭐️
const { TokenTypes, Tokenizer } = require('./Tokenizer')

class Parser {
  parse(input) {
    this.input = input
    this.tokenizer = new Tokenizer(input)
    this.lookahead = this.tokenizer.getNextToken()

    return this.Expression()
  }

  eat(tokenType) {
    const token = this.lookahead

    if (token == null) {
      throw new SyntaxError(`Unexpected end of input, expected "${tokenType}"`)
    }

    if (token.type !== tokenType) {
      throw new SyntaxError(
        `Unexpected token: "${token.value}", expected "${tokenType}"`
      )
    }

    this.lookahead = this.tokenizer.getNextToken()

    return token
  }

  BinaryExpression(leftRule, rightRule, operatorType1, operatorType2) {
    let left = leftRule()

    while (
      this.lookahead &&
      (this.lookahead.type === operatorType1 ||
        this.lookahead.type === operatorType2)
    ) {
      const operator = this.eat(this.lookahead.type).value
      left = {
        type: 'BinaryExpression',
        operator,
        left,
        right: rightRule()
      }
    }

    return left
  }

  /**
   * Expression
   *    = Term (("+" / "-") Term)*
   */
  Expression() {
    return this.BinaryExpression(
      () => this.Term(),
      () => this.Term(),
      TokenTypes.ADDITION,
      TokenTypes.SUBTRACTION
    )
  }

  /**
   * Term
   *    = Factor (("*" / "/") Factor)*
   */
  Term() {
    return this.BinaryExpression(
      () => this.Factor(),
      () => this.Factor(),
      TokenTypes.MULTIPLICATION,
      TokenTypes.DIVISION
    )
  }

  /**
   * Factor
   *    = Primary ("^" Factor)*
   */
  Factor() {
    return this.BinaryExpression(
      () => this.Primary(),
      () => this.Factor(),
      TokenTypes.EXPONENTIATION
    )
  }

  /**
   * Primary
   *    = ParenthesizedExpression
   *    / UnaryExpression
   *    / FunctionExpression
   *    / NUMBER
   */
  Primary() {
    if (this.lookahead.type === TokenTypes.PARENTHESIS_LEFT) {
      return this.ParenthesizedExpression()
    }

    if (this.lookahead.type === TokenTypes.SUBTRACTION) {
      return this.UnaryExpression()
    }

    if (this.lookahead.type === TokenTypes.IDENTIFIER) {
      return this.FunctionExpression()
    }

    const token = this.eat(TokenTypes.NUMBER)
    return {
      type: 'Number',
      value: Number(token.value)
    }
  }

  /**
   * ParenthesizedExpression
   *    = "(" Expression ")"
   */
  ParenthesizedExpression() {
    this.eat(TokenTypes.PARENTHESIS_LEFT)
    const expression = this.Expression()
    this.eat(TokenTypes.PARENTHESIS_RIGHT)

    return expression
  }

  /**
   * UnaryExpression
   *    = "-" Factor
   */
  UnaryExpression() {
    this.eat(TokenTypes.SUBTRACTION)
    return {
      type: 'UnaryExpression',
      value: this.Factor()
    }
  }

  /**
   * FunctionExpression
   *    = IDENTIFIER ParenthesizedExpression
   */
  FunctionExpression() {
    const token = this.eat(TokenTypes.IDENTIFIER)
    return {
      type: 'Function',
      name: token.value,
      value: this.ParenthesizedExpression()
    }
  }
}

module.exports = { Parser }

Notice all the changes we made to our parser. Since we drastically cleaned up common code in the previous tutorial using the BinaryExpression method, it's super easy to return a single AST node that works for all mathematical operators.

We no longer need the trig method because we'll handle computations in an AST evaluator later. We then had to update the UnaryExpression and FunctionExpression methods to return AST nodes. Finally, we updated the Primary method to return a Number AST node. We actually didn't have to make too many changes!

We can import this parser into a file named ast-example.js and insert the following contents:

js
Copied! ⭐️
const { Parser } = require('./Parser-AST')

const parser = new Parser()
const ast = parser.parse('1 + 2 * 3')
console.dir(ast, { depth: null }) // log nested objects

We can execute this code using the following command:

shell
Copied! ⭐️
node ast-example

We should see the AST for the math expression, 1 + 2 * 3, displayed in the terminal.

js
Copied! ⭐️
{
  type: 'BinaryExpression',
  operator: '+',
  left: { type: 'Number', value: 1 },
  right: {
    type: 'BinaryExpression',
    operator: '*',
    left: { type: 'Number', value: 2 },
    right: { type: 'Number', value: 3 }
  }
}

Evaluating and Testing the AST

Since our AST is identical to the one we created in Part 8 of this tutorial series, we can reuse our AST evaluator from that tutorial.

Let's create a new file called NodeVisitor.js and add the following contents.

js
Copied! ⭐️
class NodeVisitor {
  visit(node) {
    switch (node.type) {
      case 'Number':
        return this.visitNumber(node);
      case 'BinaryExpression':
        return this.visitBinaryExpression(node);
      case 'UnaryExpression':
        return this.visitUnaryExpression(node);
      case 'Function':
        return this.visitFunction(node);
    }
  }

  visitNumber(node) {
    return node.value;
  }

  visitBinaryExpression(node) {
    switch (node.operator) {
      case '+':
        return this.visit(node.left) + this.visit(node.right);
      case '-':
        return this.visit(node.left) - this.visit(node.right);
      case '*':
        return this.visit(node.left) * this.visit(node.right);
      case '/':
        return this.visit(node.left) / this.visit(node.right);
      case '^':
        return this.visit(node.left) ** this.visit(node.right);
      default:
        throw new Error(`Invalid operation: ${node.operator}`);
    }
  }

  visitUnaryExpression(node) {
    return -this.visit(node.value);
  }

  visitFunction(node) {
    switch (node.name) {
      case 'sin':
        return Math.sin(this.visit(node.value));
      case 'cos':
        return Math.cos(this.visit(node.value));
      case 'tan':
        return Math.tan(this.visit(node.value));
      default:
        throw new Error(`Invalid function: ${node.name}`);
    }
  }
}

module.exports = { NodeVisitor }

We can test our AST evaluator against a variety of math expressions using the tests.js file we wrote in the previous tutorial.

Inside the tests.js file, import the Parser class from the Parser-AST.js file and import the NodeVisitor class from the NodeVisitor.js file.

Then, we'll update the tests to run against an evaluated AST instead of using the parser from the previous tutorial that directly returned a value.

js
Copied! ⭐️
const assert = require('assert').strict
const { Parser } = require('./Parser-AST')
const { NodeVisitor } = require('./NodeVisitor')

const mathTests = [
  ['1', 1],
  [' 2 ', 2],
  ['1 + 2', 3],
  [' 1 + 2 ', 3],
  ['1 + 2 * 3', 7],
  ['(1 + 2) * 3', 9],
  ['5 - 2', 3],
  ['5 - 2 - 1', 2],
  ['12 / 2 / 3', 2],
  ['2 ^ 3 + 1', 9],
  ['-2 ^ 2', -4],
  ['(-2) ^ 2', 4],
  ['-2 ^ 2 + 1', -3],
  ['cos(0) + 3 * -4 / -2 ^ 2', 4]
]

const parser = new Parser()
const visitor = new NodeVisitor()
let result

for (const [expression, expected] of mathTests) {
  try {
    const ast = parser.parse(expression)
    result = visitor.visit(ast)
    assert.equal(result, expected)
  } catch (err) {
    const lines = '-'.repeat(process.stdout.columns)
    console.log(lines)
    console.log(`Expression failed: "${expression}"`)
    console.log(`Expected result: ${expected}`)
    console.log(`Actual result: ${result}`)
    console.log(lines)
    throw err
  }
}

console.log('All tests passed! 🎉')

We can execute this file using the following command:

shell
Copied! ⭐️
node tests

If everything goes well, then all the tests should pass, and we should feel proud of our new recursive descent parser that can build ASTs 😁

Conclusion

In this tutorial, we learned how to make a recursive descent parser that can produce an AST instead of evaluating a result. By generating an AST, it gives the user freedom to choose how they would like to evaluate a math expression. We also learned how to run test cases against various math expressions to make sure the AST was built correctly.

Throughout this math expression parser series, we've learned six different ways of building math expression parsers:

  1. Reverse Polish Notation (RPN) Evaluator
  2. Shunting Yard Algorithm
  3. LR Parser Generator
  4. LL Parser Generator
  5. PEG Parser Generator
  6. Recursive Descent Parser

In the next tutorial, we'll learn the seventh and final way to build a math expression parser. There are definitely other ways, but I only promised seven at the beginning of this tutorial series 😅

Anyways, see you there! Happy coding!