Skip to main content

Copy of Simple but Powerful Pratt Parsing from Matklad

· 604 min read

This is a copy of [Simple but Powerful Pratt Parsing from Matklad's blog with no modifications at all.

I am copying this bolg simply because I really love this blog, and I want to show case how gocire works with lsp can bring ide features to a blog.

Expand to view code
use std::{fmt, io::BufRead};

enum S {
    Atom(char),
    Cons(char, Vec<S>),
}

impl fmt::Display for S {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            S::Atom(i: &char) => write!(f, "{}", i),
            S::Cons(head: &char, rest: &Vec<S>) => {
                write!(f, "({}", head)?;
                for s: &S in rest {
                    write!(f, " {}", s)?
                }
                write!(f, ")")
            }
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Token {
    Atom(char),
    Op(char),
    Eof,
}

struct Lexer {
    tokens: Vec<Token>,
}

impl Lexer {
    fn new(input: &str) -> Lexer {
        let mut tokens: Vec<Token> = input &str
            .chars() Chars<'_>
            .filter(|it: &char| !it.is_ascii_whitespace()) impl Iterator<Item = char>
            .map(|c: char| match c {
                '0'..='9'
                | 'a'..='z' | 'A'..='Z' => Token::Atom(c),
                _ => Token::Op(c),
            }) impl Iterator<Item = Token>
            .collect::<Vec<_ = Token>>();
        tokens.reverse();
        Lexer { tokens }
    }

    fn next(&mut self) -> Token {
        self.tokens.pop().unwrap_or(default: Token::Eof)
    }
    fn peek(&mut self) -> Token {
        self.tokens.last().copied().unwrap_or(default: Token::Eof)
    }
}

fn expr(input: &str) -> S {
    let mut lexer: Lexer = Lexer::new(input);
    expr_bp(&mut lexer, min_bp: 0)
}

fn expr_bp(lexer: &mut Lexer, min_bp: u8) -> S {
    let mut lhs: S = match lexer.next() {
        Token::Atom(it: char) => S::Atom(it),
        Token::Op('(') => {
            let lhs: S = expr_bp(lexer, min_bp: 0);
            assert_eq!(lexer.next(), Token::Op(')'));
            lhs
        }
        Token::Op(op: char) => {
            let ((), r_bp: u8) = prefix_binding_power(op);
            let rhs: S = expr_bp(lexer, min_bp: r_bp);
            S::Cons(op, vec![rhs])
        }
        t: Token => panic!("bad token: {:?}", t),
    };

    loop {
        let op: char = match lexer.peek() {
            Token::Eof => break,
            Token::Op(op: char) => op,
            t: Token => panic!("bad token: {:?}", t),
        };

        if let Some((l_bp: u8, ())) = postfix_binding_power(op) {
            if l_bp < min_bp {
                break;
            }
            lexer.next();

            lhs = if op == '[' {
                let rhs: S = expr_bp(lexer, min_bp: 0);
                assert_eq!(lexer.next(), Token::Op(']'));
                S::Cons(op, vec![lhs, rhs])
            } else {
                S::Cons(op, vec![lhs])
            };
            continue;
        }

        if let Some((l_bp: u8, r_bp: u8)) = infix_binding_power(op) {
            if l_bp < min_bp {
                break;
            }
            lexer.next();

            lhs = if op == '?' {
                let mhs: S = expr_bp(lexer, min_bp: 0);
                assert_eq!(lexer.next(), Token::Op(':'));
                let rhs: S = expr_bp(lexer, min_bp: r_bp);
                S::Cons(op, vec![lhs, mhs, rhs])
            } else {
                let rhs: S = expr_bp(lexer, min_bp: r_bp);
                S::Cons(op, vec![lhs, rhs])
            };
            continue;
        }

        break;
    } loop

    lhs
} fn expr_bp

fn prefix_binding_power(op: char) -> ((), u8) {
    match op {
        '+' | '-' => ((), 9),
        _ => panic!("bad op: {:?}", op),
    }
}

fn postfix_binding_power(op: char) -> Option<(u8, ())> {
    let res: (u8, ()) = match op {
        '!' => (11, ()),
        '[' => (11, ()),
        _ => return None,
    };
    Some(res)
}

fn infix_binding_power(op: char) -> Option<(u8, u8)> {
    let res: (u8, u8) = match op {
        '=' => (2, 1),
        '?' => (4, 3),
        '+' | '-' => (5, 6),
        '*' | '/' => (7, 8),
        '.' => (14, 13),
        _ => return None,
    };
    Some(res)
}

#[test]
fn tests() {
    let s: S = expr(input: "1");
    assert_eq!(s.to_string(), "1");

    let s: S = expr(input: "1 + 2 * 3");
    assert_eq!(s.to_string(), "(+ 1 (* 2 3))");

    let s: S = expr(input: "a + b * c * d + e");
    assert_eq!(s.to_string(), "(+ (+ a (* (* b c) d)) e)");

    let s: S = expr(input: "f . g . h");
    assert_eq!(s.to_string(), "(. f (. g h))");

    let s: S = expr(input: " 1 + 2 + f . g . h * 3 * 4");
    assert_eq!(
        s.to_string(),
        "(+ (+ 1 2) (* (* (. f (. g h)) 3) 4))",
    );

    let s: S = expr(input: "--1 * 2");
    assert_eq!(s.to_string(), "(* (- (- 1)) 2)");

    let s: S = expr(input: "--f . g");
    assert_eq!(s.to_string(), "(- (- (. f g)))");

    let s: S = expr(input: "-9!");
    assert_eq!(s.to_string(), "(- (! 9))");

    let s: S = expr(input: "f . g !");
    assert_eq!(s.to_string(), "(! (. f g))");

    let s: S = expr(input: "(((0)))");
    assert_eq!(s.to_string(), "0");

    let s: S = expr(input: "x[0][1]");
    assert_eq!(s.to_string(), "([ ([ x 0) 1)");

    let s: S = expr(
        input: "a ? b :
         c ? d
         : e",
    );
    assert_eq!(s.to_string(), "(? a b (? c d e))");

    let s: S = expr(input: "a = 0 ? b : c = d");
    assert_eq!(s.to_string(), "(= a (= (? 0 b c) d))")
} fn tests

fn main() {
    for line: Result<String, Error> in std::io::stdin().lock().lines() {
        let line: String = line.unwrap();
        let s: S = expr(input: &line);
        println!("{}", s)
    }
}