lexer.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /*
  2. Lexer only supports single-byte character sets like ASCII.
  3. @TODO convert to Unicode / UTF-8.
  4. */
  5. package lexer
  6. import (
  7. "fgm/waiig15/token"
  8. )
  9. type Lexer struct {
  10. input string
  11. position int // current position in input (points to current char)
  12. readPosition int // current reading position in input (after current char)
  13. ch byte // current char under examination
  14. }
  15. func New(input string) *Lexer {
  16. l := &Lexer{input: input}
  17. l.readChar()
  18. return l
  19. }
  20. // Give us the next character and advance our position in the input string.
  21. func (l *Lexer) readChar() {
  22. if l.readPosition >= len(l.input) {
  23. l.ch = 0
  24. } else {
  25. l.ch = l.input[l.readPosition]
  26. }
  27. l.position = l.readPosition
  28. l.readPosition += 1
  29. }
  30. func (l *Lexer) NextToken() token.Token {
  31. var tok token.Token
  32. l.skipWhitespace()
  33. switch l.ch {
  34. case '=':
  35. if l.peekChar() == '=' {
  36. ch := l.ch
  37. l.readChar()
  38. literal := string(ch) + string(l.ch)
  39. tok = token.Token{Type: token.EQ, Literal: literal}
  40. } else {
  41. tok = newToken(token.ASSIGN, l.ch)
  42. }
  43. case '(':
  44. tok = newToken(token.LPAREN, l.ch)
  45. case ')':
  46. tok = newToken(token.RPAREN, l.ch)
  47. case '+':
  48. tok = newToken(token.PLUS, l.ch)
  49. case '-':
  50. tok = newToken(token.MINUS, l.ch)
  51. case '!':
  52. if l.peekChar() == '=' {
  53. ch := l.ch
  54. l.readChar()
  55. literal := string(ch) + string(l.ch)
  56. tok = token.Token{Type: token.NOT_EQ, Literal: literal}
  57. } else {
  58. tok = newToken(token.BANG, l.ch)
  59. }
  60. case '/':
  61. tok = newToken(token.SLASH, l.ch)
  62. case '*':
  63. tok = newToken(token.ASTERISK, l.ch)
  64. case '<':
  65. tok = newToken(token.LT, l.ch)
  66. case '>':
  67. tok = newToken(token.GT, l.ch)
  68. case ';':
  69. tok = newToken(token.SEMICOLON, l.ch)
  70. case ',':
  71. tok = newToken(token.COMMA, l.ch)
  72. case '{':
  73. tok = newToken(token.LBRACE, l.ch)
  74. case '}':
  75. tok = newToken(token.RBRACE, l.ch)
  76. case 0:
  77. tok.Literal = ""
  78. tok.Type = token.EOF
  79. default:
  80. if isLetter(l.ch) {
  81. tok.Literal = l.readIdentifier()
  82. tok.Type = token.LookupIdent(tok.Literal)
  83. // We already read the next char, so avoid the final readChar().
  84. return tok
  85. } else if isDigit(l.ch) {
  86. tok.Type = token.INT
  87. tok.Literal = l.readNumber()
  88. // Ditto.
  89. return tok
  90. } else {
  91. tok = newToken(token.ILLEGAL, l.ch)
  92. }
  93. }
  94. l.readChar()
  95. return tok
  96. }
  97. func newToken(tokenType token.TokenType, ch byte) token.Token {
  98. return token.Token{Type: tokenType, Literal: string(ch)}
  99. }
  100. func (l *Lexer) peekChar() byte {
  101. if l.readPosition >= len(l.input) {
  102. return 0
  103. } else {
  104. return l.input[l.readPosition]
  105. }
  106. }
  107. func (l *Lexer) readIdentifier() string {
  108. position := l.position
  109. for isLetter(l.ch) {
  110. l.readChar()
  111. }
  112. return l.input[position:l.position]
  113. }
  114. func (l *Lexer) readNumber() string {
  115. position := l.position
  116. for isDigit(l.ch) {
  117. l.readChar()
  118. }
  119. return l.input[position:l.position]
  120. }
  121. func isDigit(ch byte) bool {
  122. return '0' <= ch && ch <= '9'
  123. }
  124. func isLetter(ch byte) bool {
  125. return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
  126. }
  127. func (l *Lexer) skipWhitespace() {
  128. for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
  129. l.readChar()
  130. }
  131. }