parser.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. package parser
  2. import (
  3. "fmt"
  4. "strconv"
  5. "code.osinet.fr/fgm/waiig15/ast"
  6. "code.osinet.fr/fgm/waiig15/lexer"
  7. "code.osinet.fr/fgm/waiig15/token"
  8. )
  9. // Precedence constants.
  10. const (
  11. _ int = iota
  12. LOWEST
  13. EQUALS // ==
  14. LESSGREATER // > or <
  15. SUM // +
  16. PRODUCT // *
  17. PREFIX // -X or !X
  18. CALL // myFunction(X)
  19. )
  20. var precedences = map[token.TokenType]int{
  21. token.EQ: EQUALS,
  22. token.NOT_EQ: EQUALS,
  23. token.LT: LESSGREATER,
  24. token.GT: LESSGREATER,
  25. token.PLUS: SUM,
  26. token.MINUS: SUM,
  27. token.SLASH: PRODUCT,
  28. token.ASTERISK: PRODUCT,
  29. }
  30. type (
  31. prefixParseFn func() ast.Expression
  32. infixParseFn func(ast.Expression) ast.Expression
  33. )
  34. // Parser implements the parsing mechanism top-level layer.
  35. type Parser struct {
  36. l *lexer.Lexer
  37. errors []string
  38. curToken token.Token
  39. peekToken token.Token
  40. prefixParseFns map[token.TokenType]prefixParseFn
  41. infixParseFns map[token.TokenType]infixParseFn
  42. }
  43. // New returns a new Parser instance with the first two parser tokens already
  44. // loaded.
  45. func New(l *lexer.Lexer) *Parser {
  46. p := &Parser{
  47. l: l,
  48. errors: []string{},
  49. }
  50. p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
  51. p.registerPrefix(token.IDENT, p.parseIdentifier)
  52. p.registerPrefix(token.INT, p.parseIntegerLiteral)
  53. p.registerPrefix(token.BANG, p.parsePrefixExpression)
  54. p.registerPrefix(token.MINUS, p.parsePrefixExpression)
  55. p.registerPrefix(token.TRUE, p.parseBoolean)
  56. p.registerPrefix(token.FALSE, p.parseBoolean)
  57. p.registerPrefix(token.LPAREN, p.parseGroupedExpression)
  58. p.registerPrefix(token.IF, p.parseIfExpression)
  59. p.infixParseFns = make(map[token.TokenType]infixParseFn)
  60. for _, tok := range []token.TokenType{
  61. token.ASTERISK,
  62. token.EQ,
  63. token.GT,
  64. token.LT,
  65. token.MINUS,
  66. token.NOT_EQ,
  67. token.PLUS,
  68. token.SLASH,
  69. } {
  70. p.registerInfix(tok, p.parseInfixExpression)
  71. }
  72. // Read two tokens, so curToken and peekToken are both set
  73. p.nextToken()
  74. p.nextToken()
  75. return p
  76. }
  77. func (p *Parser) nextToken() {
  78. p.curToken = p.peekToken
  79. p.peekToken = p.l.NextToken()
  80. }
  81. // Is the current token in the parser of the given type ?
  82. func (p *Parser) curTokenIs(t token.TokenType) bool {
  83. return p.curToken.Type == t
  84. }
  85. // Is the next token in the parser of the given type ? Don't consume it.
  86. func (p *Parser) peekTokenIs(t token.TokenType) bool {
  87. return p.peekToken.Type == t
  88. }
  89. // Is the next token in the parser of the given type ? If it is, consume it,
  90. // else don't.
  91. func (p *Parser) expectPeek(t token.TokenType) bool {
  92. if p.peekTokenIs(t) {
  93. p.nextToken()
  94. return true
  95. }
  96. p.peekError(t)
  97. return false
  98. }
  99. // Errors is a getter for Parser.errors.
  100. func (p *Parser) Errors() []string {
  101. return p.errors
  102. }
  103. // Log a mismatch error on the peek token type in the parser instance.
  104. //
  105. // - t is the type of token that was expected
  106. func (p *Parser) peekError(t token.TokenType) {
  107. msg := fmt.Sprintf("expected next token to be %s, got %s instead",
  108. t, p.peekToken.Type)
  109. p.errors = append(p.errors, msg)
  110. }
  111. func (p *Parser) noPrefixParseFnError(t token.TokenType) {
  112. msg := fmt.Sprintf("no prefix parse function for %s found", t)
  113. p.errors = append(p.errors, msg)
  114. }
  115. // ParseProgram is the outermost parsing logic, accumulating statements in a
  116. // Program instance and returning that instance once parsing is done.
  117. func (p *Parser) ParseProgram() *ast.Program {
  118. defer untrace(trace("ParseProgram"))
  119. program := &ast.Program{
  120. Statements: []ast.Statement{},
  121. }
  122. for !p.curTokenIs(token.EOF) {
  123. stmt := p.parseStatement()
  124. if stmt != nil {
  125. program.Statements = append(program.Statements, stmt)
  126. }
  127. p.nextToken()
  128. }
  129. return program
  130. }
  131. func (p *Parser) parseStatement() ast.Statement {
  132. defer untrace(trace("parseStatement"))
  133. switch p.curToken.Type {
  134. case token.LET:
  135. return p.parseLetStatement()
  136. case token.RETURN:
  137. return p.parseReturnStatement()
  138. default:
  139. return p.parseExpressionStatement()
  140. }
  141. }
  142. func (p *Parser) parseLetStatement() *ast.LetStatement {
  143. defer untrace(trace("parseLetStatement"))
  144. stmt := &ast.LetStatement{
  145. Token: p.curToken,
  146. }
  147. // Let statement starts with an IDENT token, so if next token is not an
  148. // IDENT, the next statement cannot be a Let statement.
  149. if !p.expectPeek(token.IDENT) {
  150. return nil
  151. }
  152. stmt.Name = &ast.Identifier{
  153. Token: p.curToken,
  154. Value: p.curToken.Literal,
  155. }
  156. // The previous expectPeek() call fetched the next token, so we should now
  157. // be on the assignment.
  158. if !p.expectPeek(token.ASSIGN) {
  159. return nil
  160. }
  161. // Skip the expression for now, progress to the semicolon terminating the
  162. // statement.
  163. for !p.curTokenIs(token.SEMICOLON) {
  164. p.nextToken()
  165. }
  166. return stmt
  167. }
  168. func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
  169. defer untrace(trace("parseReturnStatement"))
  170. stmt := &ast.ReturnStatement{
  171. Token: p.curToken,
  172. }
  173. // There should be an expression to consume here.
  174. p.nextToken()
  175. // Skip the expression for now, progress to the semicolon terminating the
  176. // statement.
  177. for !p.curTokenIs(token.SEMICOLON) {
  178. p.nextToken()
  179. }
  180. return stmt
  181. }
  182. func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement {
  183. defer untrace(trace("parseExpressionStatement"))
  184. stmt := &ast.ExpressionStatement{
  185. Token: p.curToken,
  186. }
  187. stmt.Expression = p.parseExpression(LOWEST)
  188. // Semicolons are optional to help use REPL input.
  189. if p.peekTokenIs(token.SEMICOLON) {
  190. p.nextToken()
  191. }
  192. return stmt
  193. }
  194. func (p *Parser) parseExpression(precedence int) ast.Expression {
  195. defer untrace(trace("parseExpression"))
  196. prefix := p.prefixParseFns[p.curToken.Type]
  197. if prefix == nil {
  198. p.noPrefixParseFnError(p.curToken.Type)
  199. return nil
  200. }
  201. leftExp := prefix()
  202. for !p.peekTokenIs(token.SEMICOLON) && precedence < p.peekPrecedence() {
  203. infix := p.infixParseFns[p.peekToken.Type]
  204. if infix == nil {
  205. return leftExp
  206. }
  207. p.nextToken()
  208. leftExp = infix(leftExp)
  209. }
  210. return leftExp
  211. }
  212. // Look forward for the precedence of the next token without advancing.
  213. func (p *Parser) peekPrecedence() int {
  214. if precedence, ok := precedences[p.peekToken.Type]; ok {
  215. return precedence
  216. }
  217. return LOWEST
  218. }
  219. // Return the precedence for the current token without advancing.
  220. func (p *Parser) curPrecedence() int {
  221. if precedence, ok := precedences[p.curToken.Type]; ok {
  222. return precedence
  223. }
  224. return LOWEST
  225. }
  226. // parseIdentifier does not advance the tokens or call nextToken, and this is
  227. // important.
  228. func (p *Parser) parseIdentifier() ast.Expression {
  229. return &ast.Identifier{
  230. Token: p.curToken,
  231. Value: p.curToken.Literal,
  232. }
  233. }
  234. func (p *Parser) parseIntegerLiteral() ast.Expression {
  235. defer untrace(trace("parseIntegerLiteral"))
  236. lit := &ast.IntegerLiteral{
  237. Token: p.curToken,
  238. }
  239. // Base 0 allows straight interpretation of octal 0755 or hex 0xABCD.
  240. value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
  241. if err != nil {
  242. msg := fmt.Sprintf("could not parse %q as integer",
  243. p.curToken.Literal)
  244. p.errors = append(p.errors, msg)
  245. return nil
  246. }
  247. lit.Value = value
  248. return lit
  249. }
  250. func (p *Parser) parsePrefixExpression() ast.Expression {
  251. defer untrace(trace("parsePrefixExpression"))
  252. expression := &ast.PrefixExpression{
  253. Token: p.curToken,
  254. Operator: p.curToken.Literal,
  255. }
  256. // Consume the operator token to progress to the prefixed expression.
  257. p.nextToken()
  258. // The precedence is now that of the prefix operator instead of the lowest.
  259. expression.Right = p.parseExpression(PREFIX)
  260. return expression
  261. }
  262. func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression {
  263. defer untrace(trace("parseInfixExpression"))
  264. expression := &ast.InfixExpression{
  265. Token: p.curToken,
  266. Operator: p.curToken.Literal,
  267. Left: left,
  268. }
  269. precedence := p.curPrecedence()
  270. p.nextToken()
  271. expression.Right = p.parseExpression(precedence)
  272. return expression
  273. }
  274. func (p *Parser) parseBoolean() ast.Expression {
  275. defer untrace(trace("parseBoolean"))
  276. expression := &ast.Boolean{
  277. Token: p.curToken,
  278. Value: p.curTokenIs(token.TRUE),
  279. }
  280. return expression
  281. }
  282. func (p *Parser) parseIfExpression() ast.Expression {
  283. expression := &ast.IfExpression{
  284. Token: p.curToken,
  285. }
  286. // If expressions need parentheses around the condition. Opening one.
  287. if !p.expectPeek(token.LPAREN) {
  288. return nil
  289. }
  290. p.nextToken()
  291. expression.Condition = p.parseExpression(LOWEST)
  292. // If expressions need parentheses around the condition. Closing one.
  293. if !p.expectPeek(token.RPAREN) {
  294. return nil
  295. }
  296. // Consequences are blocks, no omitted braces as in C/JS/PHP.
  297. if !p.expectPeek(token.LBRACE) {
  298. return nil
  299. }
  300. expression.Consequence = p.parseBlockStatement()
  301. if p.peekTokenIs(token.ELSE) {
  302. // We know it's an ELSE since we just checked, so no expectPeek().
  303. p.nextToken()
  304. // Alternatives are blocks, no omitted braces as in C/JS/PHP.
  305. if !p.expectPeek(token.LBRACE) {
  306. return nil
  307. }
  308. expression.Alternative = p.parseBlockStatement()
  309. }
  310. return expression
  311. }
  312. func (p *Parser) parseBlockStatement() *ast.BlockStatement {
  313. block := &ast.BlockStatement{
  314. Token: p.curToken,
  315. }
  316. block.Statements = []ast.Statement{}
  317. // Consume LBRACE.
  318. p.nextToken()
  319. // Parse until RBRACE or EOF.
  320. for !p.curTokenIs(token.RBRACE) && !p.curTokenIs(token.EOF) {
  321. stmt := p.parseStatement()
  322. if stmt != nil {
  323. block.Statements = append(block.Statements, stmt)
  324. }
  325. p.nextToken()
  326. }
  327. return block
  328. }
  329. func (p *Parser) parseGroupedExpression() ast.Expression {
  330. p.nextToken()
  331. exp := p.parseExpression(LOWEST)
  332. if !p.expectPeek(token.RPAREN) {
  333. return nil
  334. }
  335. return exp
  336. }
  337. func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
  338. p.prefixParseFns[tokenType] = fn
  339. }
  340. func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
  341. p.infixParseFns[tokenType] = fn
  342. }