feat(lexer): added basic lexer

author: Michael Tews <git@tews.dev> 2024-06-19 05:14:33 +0200
committer: Michael Tews <michael@tews.dev> 2026-04-12 11:11:01 +0200
commit: 4cafa40257ff10b65ec905fb0db799c20f4058b4 (patch)
tree: 4776b2224022aff50606bb04ba5a78cd969b85ef
parent: 4db0592135bea51950110c1abbf223e66da0a4e8 (diff)
1 files changed, 92 insertions, 1 deletions
diff --git a/lexer/lexer.go b/lexer/lexer.go
index 2bd1c4e..92722f6 100644
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@@ -1,5 +1,7 @@
 package lexer
 
+import "github.com/mewsen/interpreter/token"
+
 type Lexer struct {
 	input        string
 	position     int  // current position in input (points to current char)
@@ -8,6 +10,95 @@ type Lexer struct {
 }
 
 func New(input string) Lexer {
-	l := Lexer{}
+	l := Lexer{input: input}
+	l.readChar()
 	return l
 }
+
+func (l *Lexer) NextToken() token.Token {
+	var tok token.Token
+
+	l.skipWhitespace()
+
+	switch l.ch {
+	case '=':
+		tok = newToken(token.ASSIGN, l.ch)
+	case ';':
+		tok = newToken(token.SEMICOLON, l.ch)
+	case '(':
+		tok = newToken(token.LPAREN, l.ch)
+	case ')':
+		tok = newToken(token.RPAREN, l.ch)
+	case '{':
+		tok = newToken(token.LBRACE, l.ch)
+	case '}':
+		tok = newToken(token.RBRACE, l.ch)
+	case ',':
+		tok = newToken(token.COMMA, l.ch)
+	case '+':
+		tok = newToken(token.PLUS, l.ch)
+	case 0:
+		tok.Literal = ""
+		tok.Type = token.EOF
+	default:
+		if isLetter(l.ch) {
+			tok.Literal = l.readIdentifier()
+			tok.Type = token.LookupIdentifier(tok.Literal)
+			return tok
+		} else if isDigit(l.ch) {
+			tok.Type = token.INT
+			tok.Literal = l.readNumber()
+			return tok
+		} else {
+			tok = newToken(token.ILLEGAL, l.ch)
+		}
+	}
+
+	l.readChar()
+	return tok
+}
+
+func (l *Lexer) readNumber() string {
+	position := l.position
+	for isDigit(l.ch) {
+		l.readChar()
+	}
+
+	return l.input[position:l.position]
+}
+
+func isDigit(ch byte) bool {
+	return '0' <= ch && ch <= '9'
+}
+
+func (l *Lexer) skipWhitespace() {
+	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
+		l.readChar()
+	}
+}
+
+func (l *Lexer) readIdentifier() string {
+	position := l.position
+	for isLetter(l.ch) {
+		l.readChar()
+	}
+	return l.input[position:l.position]
+}
+
+func isLetter(ch byte) bool {
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
+}
+
+func (l *Lexer) readChar() {
+	if l.readPosition >= len(l.input) {
+		l.ch = 0
+	} else {
+		l.ch = l.input[l.readPosition]
+	}
+	l.position = l.readPosition
+	l.readPosition++
+}
+
+func newToken(tokenType token.TokenType, ch byte) token.Token {
+	return token.Token{Type: tokenType, Literal: string(ch)}
+}
author	Michael Tews <git@tews.dev>	2024-06-19 05:14:33 +0200
committer	Michael Tews <michael@tews.dev>	2026-04-12 11:11:01 +0200
commit	4cafa40257ff10b65ec905fb0db799c20f4058b4 (patch)
tree	4776b2224022aff50606bb04ba5a78cd969b85ef
parent	4db0592135bea51950110c1abbf223e66da0a4e8 (diff)