| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 | /*Package lexer only supports single-byte character sets like ASCII.@TODO convert to Unicode / UTF-8.*/package lexerimport (	"code.osinet.fr/fgm/waiig15/token")// Lexer implements the lexing mechanism.type Lexer struct {	input        string	position     int  // current position in input (points to current char)	readPosition int  // current reading position in input (after current char)	ch           byte // current char under examination}// New returns a new Lexer instance with the first character in the input// already read.func New(input string) *Lexer {	l := &Lexer{input: input}	l.readChar()	return l}// NextToken advances in the input by one token, skipping all whitespace. It// returns that token. In case of a lexing error it return an ILLEGAL token.func (l *Lexer) NextToken() token.Token {	var tok token.Token	l.skipWhitespace()	switch l.ch {	case '=':		if l.peekChar() == '=' {			ch := l.ch			l.readChar()			literal := string(ch) + string(l.ch)			tok = token.Token{Type: token.EQ, Literal: literal}		} else {			tok = newToken(token.ASSIGN, l.ch)		}	case '+':		tok = newToken(token.PLUS, l.ch)	case '-':		tok = newToken(token.MINUS, l.ch)	case '!':		if l.peekChar() == '=' {			ch := l.ch			l.readChar()			literal := string(ch) + string(l.ch)			tok = token.Token{Type: token.NOT_EQ, Literal: literal}		} else {			tok = newToken(token.BANG, l.ch)		}	case '/':		tok = newToken(token.SLASH, l.ch)	case '*':		tok = newToken(token.ASTERISK, l.ch)	case '<':		tok = newToken(token.LT, l.ch)	case '>':		tok = newToken(token.GT, l.ch)	case ';':		tok = newToken(token.SEMICOLON, l.ch)	case ',':		tok = newToken(token.COMMA, l.ch)	case '{':		tok = newToken(token.LBRACE, l.ch)	case '}':		tok = newToken(token.RBRACE, l.ch)	case '(':		tok = newToken(token.LPAREN, l.ch)	case ')':		tok = newToken(token.RPAREN, l.ch)	case 0:		tok.Literal = ""		tok.Type = token.EOF	default:		if isLetter(l.ch) {			tok.Literal = l.readIdentifier()			tok.Type = token.LookupIdent(tok.Literal)			// We already read the next char, so avoid the final readChar().			return tok		} else if isDigit(l.ch) {			tok.Type = token.INT			tok.Literal = l.readNumber()			// Ditto.			return tok		} else {			tok = newToken(token.ILLEGAL, l.ch)		}	}	l.readChar()	return tok}func (l *Lexer) skipWhitespace() {	for l.ch == ' ' ||		l.ch == '\r' ||		l.ch == '\t' ||		l.ch == '\n' {		l.readChar()	}}// Give us the next character and advance our position in the input string.func (l *Lexer) readChar() {	if l.readPosition >= len(l.input) {		l.ch = 0	} else {		l.ch = l.input[l.readPosition]	}	l.position = l.readPosition	l.readPosition++}func (l *Lexer) peekChar() byte {	if l.readPosition >= len(l.input) {		return 0	}	return l.input[l.readPosition]}func (l *Lexer) readIdentifier() string {	position := l.position	for isLetter(l.ch) {		l.readChar()	}	return l.input[position:l.position]}func (l *Lexer) readNumber() string {	position := l.position	for isDigit(l.ch) {		l.readChar()	}	return l.input[position:l.position]}func isLetter(ch byte) bool {	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'}func isDigit(ch byte) bool {	return '0' <= ch && ch <= '9'}func newToken(tokenType token.TokenType, ch byte) token.Token {	return token.Token{Type: tokenType, Literal: string(ch)}}
 |