diff options
| author | Michael Tews <git@tews.dev> | 2024-06-19 05:14:33 +0200 |
|---|---|---|
| committer | Michael Tews <michael@tews.dev> | 2026-04-12 11:11:01 +0200 |
| commit | 4cafa40257ff10b65ec905fb0db799c20f4058b4 (patch) | |
| tree | 4776b2224022aff50606bb04ba5a78cd969b85ef | |
| parent | 4db0592135bea51950110c1abbf223e66da0a4e8 (diff) | |
feat(lexer): added basic lexer
| -rw-r--r-- | lexer/lexer.go | 93 |
1 files changed, 92 insertions, 1 deletions
diff --git a/lexer/lexer.go b/lexer/lexer.go index 2bd1c4e..92722f6 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,5 +1,7 @@ package lexer +import "github.com/mewsen/interpreter/token" + type Lexer struct { input string position int // current position in input (points to current char) @@ -8,6 +10,95 @@ type Lexer struct { } func New(input string) Lexer { - l := Lexer{} + l := Lexer{input: input} + l.readChar() return l } + +func (l *Lexer) NextToken() token.Token { + var tok token.Token + + l.skipWhitespace() + + switch l.ch { + case '=': + tok = newToken(token.ASSIGN, l.ch) + case ';': + tok = newToken(token.SEMICOLON, l.ch) + case '(': + tok = newToken(token.LPAREN, l.ch) + case ')': + tok = newToken(token.RPAREN, l.ch) + case '{': + tok = newToken(token.LBRACE, l.ch) + case '}': + tok = newToken(token.RBRACE, l.ch) + case ',': + tok = newToken(token.COMMA, l.ch) + case '+': + tok = newToken(token.PLUS, l.ch) + case 0: + tok.Literal = "" + tok.Type = token.EOF + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdentifier(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = token.INT + tok.Literal = l.readNumber() + return tok + } else { + tok = newToken(token.ILLEGAL, l.ch) + } + } + + l.readChar() + return tok +} + +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + + return l.input[position:l.position] +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func (l *Lexer) readChar() { + if l.readPosition >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPosition] + } + l.position = l.readPosition + l.readPosition++ +} + +func newToken(tokenType token.TokenType, ch byte) token.Token { + return token.Token{Type: tokenType, Literal: string(ch)} +} |
