lexer.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /*
  2. Package lexer only supports single-byte character sets like ASCII.
  3. @TODO convert to Unicode / UTF-8.
  4. */
  5. package lexer
  6. import (
  7. "code.osinet.fr/fgm/waiig15/token"
  8. )
  9. // Lexer implements the lexing mechanism.
  10. type Lexer struct {
  11. input string
  12. position int // current position in input (points to current char)
  13. readPosition int // current reading position in input (after current char)
  14. ch byte // current char under examination
  15. }
  16. // New returns a new Lexer instance with the first character in the input
  17. // already read.
  18. func New(input string) *Lexer {
  19. l := &Lexer{input: input}
  20. l.readChar()
  21. return l
  22. }
  23. // NextToken advances in the input by one token, skipping all whitespace. It
  24. // returns that token. In case of a lexing error it return an ILLEGAL token.
  25. func (l *Lexer) NextToken() token.Token {
  26. var tok token.Token
  27. l.skipWhitespace()
  28. switch l.ch {
  29. case '=':
  30. if l.peekChar() == '=' {
  31. ch := l.ch
  32. l.readChar()
  33. literal := string(ch) + string(l.ch)
  34. tok = token.Token{Type: token.EQ, Literal: literal}
  35. } else {
  36. tok = newToken(token.ASSIGN, l.ch)
  37. }
  38. case '+':
  39. tok = newToken(token.PLUS, l.ch)
  40. case '-':
  41. tok = newToken(token.MINUS, l.ch)
  42. case '!':
  43. if l.peekChar() == '=' {
  44. ch := l.ch
  45. l.readChar()
  46. literal := string(ch) + string(l.ch)
  47. tok = token.Token{Type: token.NOT_EQ, Literal: literal}
  48. } else {
  49. tok = newToken(token.BANG, l.ch)
  50. }
  51. case '/':
  52. tok = newToken(token.SLASH, l.ch)
  53. case '*':
  54. tok = newToken(token.ASTERISK, l.ch)
  55. case '<':
  56. tok = newToken(token.LT, l.ch)
  57. case '>':
  58. tok = newToken(token.GT, l.ch)
  59. case ';':
  60. tok = newToken(token.SEMICOLON, l.ch)
  61. case ',':
  62. tok = newToken(token.COMMA, l.ch)
  63. case '{':
  64. tok = newToken(token.LBRACE, l.ch)
  65. case '}':
  66. tok = newToken(token.RBRACE, l.ch)
  67. case '(':
  68. tok = newToken(token.LPAREN, l.ch)
  69. case ')':
  70. tok = newToken(token.RPAREN, l.ch)
  71. case 0:
  72. tok.Literal = ""
  73. tok.Type = token.EOF
  74. default:
  75. if isLetter(l.ch) {
  76. tok.Literal = l.readIdentifier()
  77. tok.Type = token.LookupIdent(tok.Literal)
  78. // We already read the next char, so avoid the final readChar().
  79. return tok
  80. } else if isDigit(l.ch) {
  81. tok.Type = token.INT
  82. tok.Literal = l.readNumber()
  83. // Ditto.
  84. return tok
  85. } else {
  86. tok = newToken(token.ILLEGAL, l.ch)
  87. }
  88. }
  89. l.readChar()
  90. return tok
  91. }
  92. func (l *Lexer) skipWhitespace() {
  93. for l.ch == ' ' ||
  94. l.ch == '\r' ||
  95. l.ch == '\t' ||
  96. l.ch == '\n' {
  97. l.readChar()
  98. }
  99. }
  100. // Give us the next character and advance our position in the input string.
  101. func (l *Lexer) readChar() {
  102. if l.readPosition >= len(l.input) {
  103. l.ch = 0
  104. } else {
  105. l.ch = l.input[l.readPosition]
  106. }
  107. l.position = l.readPosition
  108. l.readPosition++
  109. }
  110. func (l *Lexer) peekChar() byte {
  111. if l.readPosition >= len(l.input) {
  112. return 0
  113. }
  114. return l.input[l.readPosition]
  115. }
  116. func (l *Lexer) readIdentifier() string {
  117. position := l.position
  118. for isLetter(l.ch) {
  119. l.readChar()
  120. }
  121. return l.input[position:l.position]
  122. }
  123. func (l *Lexer) readNumber() string {
  124. position := l.position
  125. for isDigit(l.ch) {
  126. l.readChar()
  127. }
  128. return l.input[position:l.position]
  129. }
  130. func isLetter(ch byte) bool {
  131. return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
  132. }
  133. func isDigit(ch byte) bool {
  134. return '0' <= ch && ch <= '9'
  135. }
  136. func newToken(tokenType token.TokenType, ch byte) token.Token {
  137. return token.Token{Type: tokenType, Literal: string(ch)}
  138. }