lexer.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. /*
  2. Lexer only supports single-byte character sets like ASCII.
  3. @TODO convert to Unicode / UTF-8.
  4. */
  5. package lexer
  6. import (
  7. "fgm/waiig15/token"
  8. )
  9. type Lexer struct {
  10. input string
  11. position int // current position in input (points to current char)
  12. readPosition int // current reading position in input (after current char)
  13. ch byte // current char under examination
  14. }
  15. func New(input string) *Lexer {
  16. l := &Lexer{input: input}
  17. l.readChar()
  18. return l
  19. }
  20. // Give us the next character and advance our position in the input string.
  21. func (l *Lexer) readChar() {
  22. if l.readPosition >= len(l.input) {
  23. l.ch = 0
  24. } else {
  25. l.ch = l.input[l.readPosition]
  26. }
  27. l.position = l.readPosition
  28. l.readPosition += 1
  29. }
  30. func (l *Lexer) NextToken() token.Token {
  31. var tok token.Token
  32. l.skipWhitespace()
  33. switch l.ch {
  34. case '=':
  35. tok = newToken(token.ASSIGN, l.ch)
  36. case ';':
  37. tok = newToken(token.SEMICOLON, l.ch)
  38. case '(':
  39. tok = newToken(token.LPAREN, l.ch)
  40. case ')':
  41. tok = newToken(token.RPAREN, l.ch)
  42. case ',':
  43. tok = newToken(token.COMMA, l.ch)
  44. case '+':
  45. tok = newToken(token.PLUS, l.ch)
  46. case '{':
  47. tok = newToken(token.LBRACE, l.ch)
  48. case '}':
  49. tok = newToken(token.RBRACE, l.ch)
  50. case 0:
  51. tok.Literal = ""
  52. tok.Type = token.EOF
  53. default:
  54. if isLetter(l.ch) {
  55. tok.Literal = l.readIdentifier()
  56. tok.Type = token.LookupIdent(tok.Literal)
  57. // We already read the next char, so avoid the final readChar().
  58. return tok
  59. } else if isDigit(l.ch) {
  60. tok.Type = token.INT
  61. tok.Literal = l.readNumber()
  62. // Ditto.
  63. return tok
  64. } else {
  65. tok = newToken(token.ILLEGAL, l.ch)
  66. }
  67. }
  68. l.readChar()
  69. return tok
  70. }
  71. func newToken(tokenType token.TokenType, ch byte) token.Token {
  72. return token.Token{Type: tokenType, Literal: string(ch)}
  73. }
  74. func (l *Lexer) readIdentifier() string {
  75. position := l.position
  76. for isLetter(l.ch) {
  77. l.readChar()
  78. }
  79. return l.input[position:l.position]
  80. }
  81. func (l *Lexer) readNumber() string {
  82. position := l.position
  83. for isDigit(l.ch) {
  84. l.readChar()
  85. }
  86. return l.input[position:l.position]
  87. }
  88. func isDigit(ch byte) bool {
  89. return '0' <= ch && ch <= '9'
  90. }
  91. func isLetter(ch byte) bool {
  92. return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
  93. }
  94. func (l *Lexer) skipWhitespace() {
  95. for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
  96. l.readChar()
  97. }
  98. }