package parser import ( "fmt" "strconv" "code.osinet.fr/fgm/waiig15/ast" "code.osinet.fr/fgm/waiig15/lexer" "code.osinet.fr/fgm/waiig15/token" ) // Precedence constants. const ( _ int = iota LOWEST EQUALS // == LESSGREATER // > or < SUM // + PRODUCT // * PREFIX // -X or !X CALL // myFunction(X) ) var precedences = map[token.TokenType]int{ token.EQ: EQUALS, token.NOT_EQ: EQUALS, token.LT: LESSGREATER, token.GT: LESSGREATER, token.PLUS: SUM, token.MINUS: SUM, token.SLASH: PRODUCT, token.ASTERISK: PRODUCT, } type ( prefixParseFn func() ast.Expression infixParseFn func(ast.Expression) ast.Expression ) // Parser implements the parsing mechanism top-level layer. type Parser struct { l *lexer.Lexer errors []string curToken token.Token peekToken token.Token prefixParseFns map[token.TokenType]prefixParseFn infixParseFns map[token.TokenType]infixParseFn } // New returns a new Parser instance with the first two parser tokens already // loaded. func New(l *lexer.Lexer) *Parser { p := &Parser{ l: l, errors: []string{}, } p.prefixParseFns = make(map[token.TokenType]prefixParseFn) p.registerPrefix(token.IDENT, p.parseIdentifier) p.registerPrefix(token.INT, p.parseIntegerLiteral) p.registerPrefix(token.BANG, p.parsePrefixExpression) p.registerPrefix(token.MINUS, p.parsePrefixExpression) p.registerPrefix(token.TRUE, p.parseBoolean) p.registerPrefix(token.FALSE, p.parseBoolean) p.registerPrefix(token.LPAREN, p.parseGroupedExpression) p.registerPrefix(token.IF, p.parseIfExpression) p.infixParseFns = make(map[token.TokenType]infixParseFn) for _, tok := range []token.TokenType{ token.ASTERISK, token.EQ, token.GT, token.LT, token.MINUS, token.NOT_EQ, token.PLUS, token.SLASH, } { p.registerInfix(tok, p.parseInfixExpression) } // Read two tokens, so curToken and peekToken are both set p.nextToken() p.nextToken() return p } func (p *Parser) nextToken() { p.curToken = p.peekToken p.peekToken = p.l.NextToken() } // Is the current token in the parser of the given type ? func (p *Parser) curTokenIs(t token.TokenType) bool { return p.curToken.Type == t } // Is the next token in the parser of the given type ? Don't consume it. func (p *Parser) peekTokenIs(t token.TokenType) bool { return p.peekToken.Type == t } // Is the next token in the parser of the given type ? If it is, consume it, // else don't. func (p *Parser) expectPeek(t token.TokenType) bool { if p.peekTokenIs(t) { p.nextToken() return true } p.peekError(t) return false } // Errors is a getter for Parser.errors. func (p *Parser) Errors() []string { return p.errors } // Log a mismatch error on the peek token type in the parser instance. // // - t is the type of token that was expected func (p *Parser) peekError(t token.TokenType) { msg := fmt.Sprintf("expected next token to be %s, got %s instead", t, p.peekToken.Type) p.errors = append(p.errors, msg) } func (p *Parser) noPrefixParseFnError(t token.TokenType) { msg := fmt.Sprintf("no prefix parse function for %s found", t) p.errors = append(p.errors, msg) } // ParseProgram is the outermost parsing logic, accumulating statements in a // Program instance and returning that instance once parsing is done. func (p *Parser) ParseProgram() *ast.Program { defer untrace(trace("ParseProgram")) program := &ast.Program{ Statements: []ast.Statement{}, } for !p.curTokenIs(token.EOF) { stmt := p.parseStatement() if stmt != nil { program.Statements = append(program.Statements, stmt) } p.nextToken() } return program } func (p *Parser) parseStatement() ast.Statement { defer untrace(trace("parseStatement")) switch p.curToken.Type { case token.LET: return p.parseLetStatement() case token.RETURN: return p.parseReturnStatement() default: return p.parseExpressionStatement() } } func (p *Parser) parseLetStatement() *ast.LetStatement { defer untrace(trace("parseLetStatement")) stmt := &ast.LetStatement{ Token: p.curToken, } // Let statement starts with an IDENT token, so if next token is not an // IDENT, the next statement cannot be a Let statement. if !p.expectPeek(token.IDENT) { return nil } stmt.Name = &ast.Identifier{ Token: p.curToken, Value: p.curToken.Literal, } // The previous expectPeek() call fetched the next token, so we should now // be on the assignment. if !p.expectPeek(token.ASSIGN) { return nil } // Skip the expression for now, progress to the semicolon terminating the // statement. for !p.curTokenIs(token.SEMICOLON) { p.nextToken() } return stmt } func (p *Parser) parseReturnStatement() *ast.ReturnStatement { defer untrace(trace("parseReturnStatement")) stmt := &ast.ReturnStatement{ Token: p.curToken, } // There should be an expression to consume here. p.nextToken() // Skip the expression for now, progress to the semicolon terminating the // statement. for !p.curTokenIs(token.SEMICOLON) { p.nextToken() } return stmt } func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement { defer untrace(trace("parseExpressionStatement")) stmt := &ast.ExpressionStatement{ Token: p.curToken, } stmt.Expression = p.parseExpression(LOWEST) // Semicolons are optional to help use REPL input. if p.peekTokenIs(token.SEMICOLON) { p.nextToken() } return stmt } func (p *Parser) parseExpression(precedence int) ast.Expression { defer untrace(trace("parseExpression")) prefix := p.prefixParseFns[p.curToken.Type] if prefix == nil { p.noPrefixParseFnError(p.curToken.Type) return nil } leftExp := prefix() for !p.peekTokenIs(token.SEMICOLON) && precedence < p.peekPrecedence() { infix := p.infixParseFns[p.peekToken.Type] if infix == nil { return leftExp } p.nextToken() leftExp = infix(leftExp) } return leftExp } // Look forward for the precedence of the next token without advancing. func (p *Parser) peekPrecedence() int { if precedence, ok := precedences[p.peekToken.Type]; ok { return precedence } return LOWEST } // Return the precedence for the current token without advancing. func (p *Parser) curPrecedence() int { if precedence, ok := precedences[p.curToken.Type]; ok { return precedence } return LOWEST } // parseIdentifier does not advance the tokens or call nextToken, and this is // important. func (p *Parser) parseIdentifier() ast.Expression { return &ast.Identifier{ Token: p.curToken, Value: p.curToken.Literal, } } func (p *Parser) parseIntegerLiteral() ast.Expression { defer untrace(trace("parseIntegerLiteral")) lit := &ast.IntegerLiteral{ Token: p.curToken, } // Base 0 allows straight interpretation of octal 0755 or hex 0xABCD. value, err := strconv.ParseInt(p.curToken.Literal, 0, 64) if err != nil { msg := fmt.Sprintf("could not parse %q as integer", p.curToken.Literal) p.errors = append(p.errors, msg) return nil } lit.Value = value return lit } func (p *Parser) parsePrefixExpression() ast.Expression { defer untrace(trace("parsePrefixExpression")) expression := &ast.PrefixExpression{ Token: p.curToken, Operator: p.curToken.Literal, } // Consume the operator token to progress to the prefixed expression. p.nextToken() // The precedence is now that of the prefix operator instead of the lowest. expression.Right = p.parseExpression(PREFIX) return expression } func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression { defer untrace(trace("parseInfixExpression")) expression := &ast.InfixExpression{ Token: p.curToken, Operator: p.curToken.Literal, Left: left, } precedence := p.curPrecedence() p.nextToken() expression.Right = p.parseExpression(precedence) return expression } func (p *Parser) parseBoolean() ast.Expression { defer untrace(trace("parseBoolean")) expression := &ast.Boolean{ Token: p.curToken, Value: p.curTokenIs(token.TRUE), } return expression } func (p *Parser) parseIfExpression() ast.Expression { expression := &ast.IfExpression{ Token: p.curToken, } // If expressions need parentheses around the condition. Opening one. if !p.expectPeek(token.LPAREN) { return nil } p.nextToken() expression.Condition = p.parseExpression(LOWEST) // If expressions need parentheses around the condition. Closing one. if !p.expectPeek(token.RPAREN) { return nil } // Consequences are blocks, no omitted braces as in C/JS/PHP. if !p.expectPeek(token.LBRACE) { return nil } expression.Consequence = p.parseBlockStatement() if p.peekTokenIs(token.ELSE) { // We know it's an ELSE since we just checked, so no expectPeek(). p.nextToken() // Alternatives are blocks, no omitted braces as in C/JS/PHP. if !p.expectPeek(token.LBRACE) { return nil } expression.Alternative = p.parseBlockStatement() } return expression } func (p *Parser) parseBlockStatement() *ast.BlockStatement { block := &ast.BlockStatement{ Token: p.curToken, } block.Statements = []ast.Statement{} // Consume LBRACE. p.nextToken() // Parse until RBRACE or EOF. for !p.curTokenIs(token.RBRACE) && !p.curTokenIs(token.EOF) { stmt := p.parseStatement() if stmt != nil { block.Statements = append(block.Statements, stmt) } p.nextToken() } return block } func (p *Parser) parseGroupedExpression() ast.Expression { p.nextToken() exp := p.parseExpression(LOWEST) if !p.expectPeek(token.RPAREN) { return nil } return exp } func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) { p.prefixParseFns[tokenType] = fn } func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) { p.infixParseFns[tokenType] = fn }