lexer.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /*
  2. Package lexer only supports single-byte character sets like ASCII.
  3. @TODO convert to Unicode / UTF-8.
  4. */
  5. package lexer
  6. import (
  7. "code.osinet.fr/fgm/waiig15/token"
  8. )
  9. // Lexer implements the lexing mechanism.
  10. type Lexer struct {
  11. input string
  12. position int // current position in input (points to current char)
  13. readPosition int // current reading position in input (after current char)
  14. ch byte // current char under examination
  15. }
  16. // New returns a new Lexer instance with the first character in the input
  17. // already read.
  18. func New(input string) *Lexer {
  19. l := &Lexer{input: input}
  20. l.readChar()
  21. return l
  22. }
  23. // Give us the next character and advance our position in the input string.
  24. func (l *Lexer) readChar() {
  25. if l.readPosition >= len(l.input) {
  26. l.ch = 0
  27. } else {
  28. l.ch = l.input[l.readPosition]
  29. }
  30. l.position = l.readPosition
  31. l.readPosition++
  32. }
  33. // NextToken advances in the input by one token, skipping all whitespace. It
  34. // returns that token. In case of a lexing error it return an ILLEGAL token.
  35. func (l *Lexer) NextToken() token.Token {
  36. var tok token.Token
  37. l.skipWhitespace()
  38. switch l.ch {
  39. case '=':
  40. if l.peekChar() == '=' {
  41. ch := l.ch
  42. l.readChar()
  43. literal := string(ch) + string(l.ch)
  44. tok = token.Token{Type: token.EQ, Literal: literal}
  45. } else {
  46. tok = newToken(token.ASSIGN, l.ch)
  47. }
  48. case '(':
  49. tok = newToken(token.LPAREN, l.ch)
  50. case ')':
  51. tok = newToken(token.RPAREN, l.ch)
  52. case '+':
  53. tok = newToken(token.PLUS, l.ch)
  54. case '-':
  55. tok = newToken(token.MINUS, l.ch)
  56. case '!':
  57. if l.peekChar() == '=' {
  58. ch := l.ch
  59. l.readChar()
  60. literal := string(ch) + string(l.ch)
  61. tok = token.Token{Type: token.NOT_EQ, Literal: literal}
  62. } else {
  63. tok = newToken(token.BANG, l.ch)
  64. }
  65. case '/':
  66. tok = newToken(token.SLASH, l.ch)
  67. case '*':
  68. tok = newToken(token.ASTERISK, l.ch)
  69. case '<':
  70. tok = newToken(token.LT, l.ch)
  71. case '>':
  72. tok = newToken(token.GT, l.ch)
  73. case ';':
  74. tok = newToken(token.SEMICOLON, l.ch)
  75. case ',':
  76. tok = newToken(token.COMMA, l.ch)
  77. case '{':
  78. tok = newToken(token.LBRACE, l.ch)
  79. case '}':
  80. tok = newToken(token.RBRACE, l.ch)
  81. case 0:
  82. tok.Literal = ""
  83. tok.Type = token.EOF
  84. default:
  85. if isLetter(l.ch) {
  86. tok.Literal = l.readIdentifier()
  87. tok.Type = token.LookupIdent(tok.Literal)
  88. // We already read the next char, so avoid the final readChar().
  89. return tok
  90. } else if isDigit(l.ch) {
  91. tok.Type = token.INT
  92. tok.Literal = l.readNumber()
  93. // Ditto.
  94. return tok
  95. } else {
  96. tok = newToken(token.ILLEGAL, l.ch)
  97. }
  98. }
  99. l.readChar()
  100. return tok
  101. }
  102. func newToken(tokenType token.TokenType, ch byte) token.Token {
  103. return token.Token{Type: tokenType, Literal: string(ch)}
  104. }
  105. func (l *Lexer) peekChar() byte {
  106. if l.readPosition >= len(l.input) {
  107. return 0
  108. }
  109. return l.input[l.readPosition]
  110. }
  111. func (l *Lexer) readIdentifier() string {
  112. position := l.position
  113. for isLetter(l.ch) {
  114. l.readChar()
  115. }
  116. return l.input[position:l.position]
  117. }
  118. func (l *Lexer) readNumber() string {
  119. position := l.position
  120. for isDigit(l.ch) {
  121. l.readChar()
  122. }
  123. return l.input[position:l.position]
  124. }
  125. func isDigit(ch byte) bool {
  126. return '0' <= ch && ch <= '9'
  127. }
  128. func isLetter(ch byte) bool {
  129. return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
  130. }
  131. func (l *Lexer) skipWhitespace() {
  132. for l.ch == ' ' ||
  133. l.ch == '\r' ||
  134. l.ch == '\t' ||
  135. l.ch == '\n' {
  136. l.readChar()
  137. }
  138. }