lexer.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. /*
  2. Lexer only supports single-byte character sets like ASCII.
  3. @TODO convert to Unicode / UTF-8.
  4. */
  5. package lexer
  6. import (
  7. "fgm/waiig15/token"
  8. )
  9. type Lexer struct {
  10. input string
  11. position int // current position in input (points to current char)
  12. readPosition int // current reading position in input (after current char)
  13. ch byte // current char under examination
  14. }
  15. func New(input string) *Lexer {
  16. l := &Lexer{input: input}
  17. l.readChar()
  18. return l
  19. }
  20. // Give us the next character and advance our position in the input string.
  21. func (l *Lexer) readChar() {
  22. if l.readPosition >= len(l.input) {
  23. l.ch = 0
  24. } else {
  25. l.ch = l.input[l.readPosition]
  26. }
  27. l.position = l.readPosition
  28. l.readPosition += 1
  29. }
  30. func (l *Lexer) NextToken() token.Token {
  31. var tok token.Token
  32. l.skipWhitespace()
  33. switch l.ch {
  34. case '=':
  35. tok = newToken(token.ASSIGN, l.ch)
  36. case '(':
  37. tok = newToken(token.LPAREN, l.ch)
  38. case ')':
  39. tok = newToken(token.RPAREN, l.ch)
  40. case '+':
  41. tok = newToken(token.PLUS, l.ch)
  42. case '-':
  43. tok = newToken(token.MINUS, l.ch)
  44. case '!':
  45. tok = newToken(token.BANG, l.ch)
  46. case '/':
  47. tok = newToken(token.SLASH, l.ch)
  48. case '*':
  49. tok = newToken(token.ASTERISK, l.ch)
  50. case '<':
  51. tok = newToken(token.LT, l.ch)
  52. case '>':
  53. tok = newToken(token.GT, l.ch)
  54. case ';':
  55. tok = newToken(token.SEMICOLON, l.ch)
  56. case ',':
  57. tok = newToken(token.COMMA, l.ch)
  58. case '{':
  59. tok = newToken(token.LBRACE, l.ch)
  60. case '}':
  61. tok = newToken(token.RBRACE, l.ch)
  62. case 0:
  63. tok.Literal = ""
  64. tok.Type = token.EOF
  65. default:
  66. if isLetter(l.ch) {
  67. tok.Literal = l.readIdentifier()
  68. tok.Type = token.LookupIdent(tok.Literal)
  69. // We already read the next char, so avoid the final readChar().
  70. return tok
  71. } else if isDigit(l.ch) {
  72. tok.Type = token.INT
  73. tok.Literal = l.readNumber()
  74. // Ditto.
  75. return tok
  76. } else {
  77. tok = newToken(token.ILLEGAL, l.ch)
  78. }
  79. }
  80. l.readChar()
  81. return tok
  82. }
  83. func newToken(tokenType token.TokenType, ch byte) token.Token {
  84. return token.Token{Type: tokenType, Literal: string(ch)}
  85. }
  86. func (l *Lexer) readIdentifier() string {
  87. position := l.position
  88. for isLetter(l.ch) {
  89. l.readChar()
  90. }
  91. return l.input[position:l.position]
  92. }
  93. func (l *Lexer) readNumber() string {
  94. position := l.position
  95. for isDigit(l.ch) {
  96. l.readChar()
  97. }
  98. return l.input[position:l.position]
  99. }
  100. func isDigit(ch byte) bool {
  101. return '0' <= ch && ch <= '9'
  102. }
  103. func isLetter(ch byte) bool {
  104. return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
  105. }
  106. func (l *Lexer) skipWhitespace() {
  107. for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
  108. l.readChar()
  109. }
  110. }