123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423 |
- package parser
- import (
- "fmt"
- "strconv"
- "code.osinet.fr/fgm/waiig15/ast"
- "code.osinet.fr/fgm/waiig15/lexer"
- "code.osinet.fr/fgm/waiig15/token"
- )
- // Precedence constants.
- const (
- _ int = iota
- LOWEST
- EQUALS // ==
- LESSGREATER // > or <
- SUM // +
- PRODUCT // *
- PREFIX // -X or !X
- CALL // myFunction(X)
- )
- var precedences = map[token.TokenType]int{
- token.EQ: EQUALS,
- token.NOT_EQ: EQUALS,
- token.LT: LESSGREATER,
- token.GT: LESSGREATER,
- token.PLUS: SUM,
- token.MINUS: SUM,
- token.SLASH: PRODUCT,
- token.ASTERISK: PRODUCT,
- }
- type (
- prefixParseFn func() ast.Expression
- infixParseFn func(ast.Expression) ast.Expression
- )
- // Parser implements the parsing mechanism top-level layer.
- type Parser struct {
- l *lexer.Lexer
- errors []string
- curToken token.Token
- peekToken token.Token
- prefixParseFns map[token.TokenType]prefixParseFn
- infixParseFns map[token.TokenType]infixParseFn
- }
- // New returns a new Parser instance with the first two parser tokens already
- // loaded.
- func New(l *lexer.Lexer) *Parser {
- p := &Parser{
- l: l,
- errors: []string{},
- }
- p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
- p.registerPrefix(token.IDENT, p.parseIdentifier)
- p.registerPrefix(token.INT, p.parseIntegerLiteral)
- p.registerPrefix(token.BANG, p.parsePrefixExpression)
- p.registerPrefix(token.MINUS, p.parsePrefixExpression)
- p.registerPrefix(token.TRUE, p.parseBoolean)
- p.registerPrefix(token.FALSE, p.parseBoolean)
- p.registerPrefix(token.LPAREN, p.parseGroupedExpression)
- p.registerPrefix(token.IF, p.parseIfExpression)
- p.infixParseFns = make(map[token.TokenType]infixParseFn)
- for _, tok := range []token.TokenType{
- token.ASTERISK,
- token.EQ,
- token.GT,
- token.LT,
- token.MINUS,
- token.NOT_EQ,
- token.PLUS,
- token.SLASH,
- } {
- p.registerInfix(tok, p.parseInfixExpression)
- }
- // Read two tokens, so curToken and peekToken are both set
- p.nextToken()
- p.nextToken()
- return p
- }
- func (p *Parser) nextToken() {
- p.curToken = p.peekToken
- p.peekToken = p.l.NextToken()
- }
- // Is the current token in the parser of the given type ?
- func (p *Parser) curTokenIs(t token.TokenType) bool {
- return p.curToken.Type == t
- }
- // Is the next token in the parser of the given type ? Don't consume it.
- func (p *Parser) peekTokenIs(t token.TokenType) bool {
- return p.peekToken.Type == t
- }
- // Is the next token in the parser of the given type ? If it is, consume it,
- // else don't.
- func (p *Parser) expectPeek(t token.TokenType) bool {
- if p.peekTokenIs(t) {
- p.nextToken()
- return true
- }
- p.peekError(t)
- return false
- }
- // Errors is a getter for Parser.errors.
- func (p *Parser) Errors() []string {
- return p.errors
- }
- // Log a mismatch error on the peek token type in the parser instance.
- //
- // - t is the type of token that was expected
- func (p *Parser) peekError(t token.TokenType) {
- msg := fmt.Sprintf("expected next token to be %s, got %s instead",
- t, p.peekToken.Type)
- p.errors = append(p.errors, msg)
- }
- func (p *Parser) noPrefixParseFnError(t token.TokenType) {
- msg := fmt.Sprintf("no prefix parse function for %s found", t)
- p.errors = append(p.errors, msg)
- }
- // ParseProgram is the outermost parsing logic, accumulating statements in a
- // Program instance and returning that instance once parsing is done.
- func (p *Parser) ParseProgram() *ast.Program {
- defer untrace(trace("ParseProgram"))
- program := &ast.Program{
- Statements: []ast.Statement{},
- }
- for !p.curTokenIs(token.EOF) {
- stmt := p.parseStatement()
- if stmt != nil {
- program.Statements = append(program.Statements, stmt)
- }
- p.nextToken()
- }
- return program
- }
- func (p *Parser) parseStatement() ast.Statement {
- defer untrace(trace("parseStatement"))
- switch p.curToken.Type {
- case token.LET:
- return p.parseLetStatement()
- case token.RETURN:
- return p.parseReturnStatement()
- default:
- return p.parseExpressionStatement()
- }
- }
- func (p *Parser) parseLetStatement() *ast.LetStatement {
- defer untrace(trace("parseLetStatement"))
- stmt := &ast.LetStatement{
- Token: p.curToken,
- }
- // Let statement starts with an IDENT token, so if next token is not an
- // IDENT, the next statement cannot be a Let statement.
- if !p.expectPeek(token.IDENT) {
- return nil
- }
- stmt.Name = &ast.Identifier{
- Token: p.curToken,
- Value: p.curToken.Literal,
- }
- // The previous expectPeek() call fetched the next token, so we should now
- // be on the assignment.
- if !p.expectPeek(token.ASSIGN) {
- return nil
- }
- // Skip the expression for now, progress to the semicolon terminating the
- // statement.
- for !p.curTokenIs(token.SEMICOLON) {
- p.nextToken()
- }
- return stmt
- }
- func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
- defer untrace(trace("parseReturnStatement"))
- stmt := &ast.ReturnStatement{
- Token: p.curToken,
- }
- // There should be an expression to consume here.
- p.nextToken()
- // Skip the expression for now, progress to the semicolon terminating the
- // statement.
- for !p.curTokenIs(token.SEMICOLON) {
- p.nextToken()
- }
- return stmt
- }
- func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement {
- defer untrace(trace("parseExpressionStatement"))
- stmt := &ast.ExpressionStatement{
- Token: p.curToken,
- }
- stmt.Expression = p.parseExpression(LOWEST)
- // Semicolons are optional to help use REPL input.
- if p.peekTokenIs(token.SEMICOLON) {
- p.nextToken()
- }
- return stmt
- }
- func (p *Parser) parseExpression(precedence int) ast.Expression {
- defer untrace(trace("parseExpression"))
- prefix := p.prefixParseFns[p.curToken.Type]
- if prefix == nil {
- p.noPrefixParseFnError(p.curToken.Type)
- return nil
- }
- leftExp := prefix()
- for !p.peekTokenIs(token.SEMICOLON) && precedence < p.peekPrecedence() {
- infix := p.infixParseFns[p.peekToken.Type]
- if infix == nil {
- return leftExp
- }
- p.nextToken()
- leftExp = infix(leftExp)
- }
- return leftExp
- }
- // Look forward for the precedence of the next token without advancing.
- func (p *Parser) peekPrecedence() int {
- if precedence, ok := precedences[p.peekToken.Type]; ok {
- return precedence
- }
- return LOWEST
- }
- // Return the precedence for the current token without advancing.
- func (p *Parser) curPrecedence() int {
- if precedence, ok := precedences[p.curToken.Type]; ok {
- return precedence
- }
- return LOWEST
- }
- // parseIdentifier does not advance the tokens or call nextToken, and this is
- // important.
- func (p *Parser) parseIdentifier() ast.Expression {
- return &ast.Identifier{
- Token: p.curToken,
- Value: p.curToken.Literal,
- }
- }
- func (p *Parser) parseIntegerLiteral() ast.Expression {
- defer untrace(trace("parseIntegerLiteral"))
- lit := &ast.IntegerLiteral{
- Token: p.curToken,
- }
- // Base 0 allows straight interpretation of octal 0755 or hex 0xABCD.
- value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
- if err != nil {
- msg := fmt.Sprintf("could not parse %q as integer",
- p.curToken.Literal)
- p.errors = append(p.errors, msg)
- return nil
- }
- lit.Value = value
- return lit
- }
- func (p *Parser) parsePrefixExpression() ast.Expression {
- defer untrace(trace("parsePrefixExpression"))
- expression := &ast.PrefixExpression{
- Token: p.curToken,
- Operator: p.curToken.Literal,
- }
- // Consume the operator token to progress to the prefixed expression.
- p.nextToken()
- // The precedence is now that of the prefix operator instead of the lowest.
- expression.Right = p.parseExpression(PREFIX)
- return expression
- }
- func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression {
- defer untrace(trace("parseInfixExpression"))
- expression := &ast.InfixExpression{
- Token: p.curToken,
- Operator: p.curToken.Literal,
- Left: left,
- }
- precedence := p.curPrecedence()
- p.nextToken()
- expression.Right = p.parseExpression(precedence)
- return expression
- }
- func (p *Parser) parseBoolean() ast.Expression {
- defer untrace(trace("parseBoolean"))
- expression := &ast.Boolean{
- Token: p.curToken,
- Value: p.curTokenIs(token.TRUE),
- }
- return expression
- }
- func (p *Parser) parseIfExpression() ast.Expression {
- expression := &ast.IfExpression{
- Token: p.curToken,
- }
- // If expressions need parentheses around the condition. Opening one.
- if !p.expectPeek(token.LPAREN) {
- return nil
- }
- p.nextToken()
- expression.Condition = p.parseExpression(LOWEST)
- // If expressions need parentheses around the condition. Closing one.
- if !p.expectPeek(token.RPAREN) {
- return nil
- }
- // Consequences are blocks, no omitted braces as in C/JS/PHP.
- if !p.expectPeek(token.LBRACE) {
- return nil
- }
- expression.Consequence = p.parseBlockStatement()
- if p.peekTokenIs(token.ELSE) {
- // We know it's an ELSE since we just checked, so no expectPeek().
- p.nextToken()
- // Alternatives are blocks, no omitted braces as in C/JS/PHP.
- if !p.expectPeek(token.LBRACE) {
- return nil
- }
- expression.Alternative = p.parseBlockStatement()
- }
- return expression
- }
- func (p *Parser) parseBlockStatement() *ast.BlockStatement {
- block := &ast.BlockStatement{
- Token: p.curToken,
- }
- block.Statements = []ast.Statement{}
- // Consume LBRACE.
- p.nextToken()
- // Parse until RBRACE or EOF.
- for !p.curTokenIs(token.RBRACE) && !p.curTokenIs(token.EOF) {
- stmt := p.parseStatement()
- if stmt != nil {
- block.Statements = append(block.Statements, stmt)
- }
- p.nextToken()
- }
- return block
- }
- func (p *Parser) parseGroupedExpression() ast.Expression {
- p.nextToken()
- exp := p.parseExpression(LOWEST)
- if !p.expectPeek(token.RPAREN) {
- return nil
- }
- return exp
- }
- func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
- p.prefixParseFns[tokenType] = fn
- }
- func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
- p.infixParseFns[tokenType] = fn
- }
|