Add lexer: Tokenize function with full test coverage
Implements lexer.Tokenize(input string) ([]token.Token, error)
- Skips whitespace
- Parses integer and decimal numbers (including leading dot e.g. .5)
- Handles all operators: + - * /
- Handles parentheses: ( )
- Appends EOF token
- Returns error on invalid characters with position info
- 12 unit tests covering: empty, whitespace-only, integers, decimals,
leading-dot numbers, operators, parens, full expressions, no-space
expressions, invalid chars, multiple decimals (1.2.3)
diff --git a/lexer/lexer.go b/lexer/lexer.go
new file mode 100644
index 0000000..0914b72
--- /dev/null
+++ b/lexer/lexer.go
@@ -0,0 +1,83 @@
+package lexer
+
+import (
+ "fmt"
+ "matheval/token"
+)
+
+// Tokenize converts an input string into a slice of tokens.
+// Returns an error if the input contains invalid characters.
+func Tokenize(input string) ([]token.Token, error) {
+ var tokens []token.Token
+ i := 0
+
+ for i < len(input) {
+ ch := input[i]
+
+ // Skip whitespace.
+ if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
+ i++
+ continue
+ }
+
+ // Number: digits and dots.
+ if isDigit(ch) || ch == '.' {
+ start := i
+ hasDot := false
+ for i < len(input) && (isDigit(input[i]) || input[i] == '.') {
+ if input[i] == '.' {
+ if hasDot {
+ break // second dot ends this number
+ }
+ hasDot = true
+ }
+ i++
+ }
+ tokens = append(tokens, token.Token{
+ Type: token.Number,
+ Literal: input[start:i],
+ Pos: start,
+ })
+ continue
+ }
+
+ // Single-character tokens.
+ var typ token.Type
+ switch ch {
+ case '+':
+ typ = token.Plus
+ case '-':
+ typ = token.Minus
+ case '*':
+ typ = token.Star
+ case '/':
+ typ = token.Slash
+ case '(':
+ typ = token.LParen
+ case ')':
+ typ = token.RParen
+ default:
+ return nil, fmt.Errorf("unexpected character %q at position %d", string(ch), i)
+ }
+
+ tokens = append(tokens, token.Token{
+ Type: typ,
+ Literal: string(ch),
+ Pos: i,
+ })
+ i++
+ }
+
+ // Always append EOF.
+ tokens = append(tokens, token.Token{
+ Type: token.EOF,
+ Literal: "",
+ Pos: i,
+ })
+
+ return tokens, nil
+}
+
+func isDigit(ch byte) bool {
+ return ch >= '0' && ch <= '9'
+}
diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go
new file mode 100644
index 0000000..94a1627
--- /dev/null
+++ b/lexer/lexer_test.go
@@ -0,0 +1,169 @@
+package lexer
+
+import (
+ "matheval/token"
+ "testing"
+)
+
+func TestTokenizeEmpty(t *testing.T) {
+ tokens, err := Tokenize("")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(tokens) != 1 || tokens[0].Type != token.EOF {
+ t.Fatalf("expected single EOF token, got %v", tokens)
+ }
+}
+
+func TestTokenizeWhitespaceOnly(t *testing.T) {
+ tokens, err := Tokenize(" \t\n ")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(tokens) != 1 || tokens[0].Type != token.EOF {
+ t.Fatalf("expected single EOF token, got %v", tokens)
+ }
+}
+
+func TestTokenizeSingleNumber(t *testing.T) {
+ tokens, err := Tokenize("42")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.Number, Literal: "42", Pos: 0},
+ {Type: token.EOF, Literal: "", Pos: 2},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+func TestTokenizeDecimalNumber(t *testing.T) {
+ tokens, err := Tokenize("3.14")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.Number, Literal: "3.14", Pos: 0},
+ {Type: token.EOF, Literal: "", Pos: 4},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+func TestTokenizeLeadingDotNumber(t *testing.T) {
+ tokens, err := Tokenize(".5")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.Number, Literal: ".5", Pos: 0},
+ {Type: token.EOF, Literal: "", Pos: 2},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+func TestTokenizeOperators(t *testing.T) {
+ tokens, err := Tokenize("+-*/")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.Plus, Literal: "+", Pos: 0},
+ {Type: token.Minus, Literal: "-", Pos: 1},
+ {Type: token.Star, Literal: "*", Pos: 2},
+ {Type: token.Slash, Literal: "/", Pos: 3},
+ {Type: token.EOF, Literal: "", Pos: 4},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+func TestTokenizeParens(t *testing.T) {
+ tokens, err := Tokenize("()")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.LParen, Literal: "(", Pos: 0},
+ {Type: token.RParen, Literal: ")", Pos: 1},
+ {Type: token.EOF, Literal: "", Pos: 2},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+func TestTokenizeFullExpression(t *testing.T) {
+ tokens, err := Tokenize("(1 + 2.5) * 3")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.LParen, Literal: "(", Pos: 0},
+ {Type: token.Number, Literal: "1", Pos: 1},
+ {Type: token.Plus, Literal: "+", Pos: 3},
+ {Type: token.Number, Literal: "2.5", Pos: 5},
+ {Type: token.RParen, Literal: ")", Pos: 8},
+ {Type: token.Star, Literal: "*", Pos: 10},
+ {Type: token.Number, Literal: "3", Pos: 12},
+ {Type: token.EOF, Literal: "", Pos: 13},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+func TestTokenizeNoSpaces(t *testing.T) {
+ tokens, err := Tokenize("1+2")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.Number, Literal: "1", Pos: 0},
+ {Type: token.Plus, Literal: "+", Pos: 1},
+ {Type: token.Number, Literal: "2", Pos: 2},
+ {Type: token.EOF, Literal: "", Pos: 3},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+func TestTokenizeInvalidCharacter(t *testing.T) {
+ _, err := Tokenize("1 + @")
+ if err == nil {
+ t.Fatal("expected error for invalid character")
+ }
+}
+
+func TestTokenizeMultipleInvalidCharacters(t *testing.T) {
+ _, err := Tokenize("1 & 2")
+ if err == nil {
+ t.Fatal("expected error for invalid character")
+ }
+}
+
+func TestTokenizeMultipleDecimals(t *testing.T) {
+ // "1.2.3" — the lexer should read "1.2" as a number, then ".3" as another number
+ tokens, err := Tokenize("1.2.3")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ expect := []token.Token{
+ {Type: token.Number, Literal: "1.2", Pos: 0},
+ {Type: token.Number, Literal: ".3", Pos: 3},
+ {Type: token.EOF, Literal: "", Pos: 5},
+ }
+ assertTokens(t, expect, tokens)
+}
+
+// assertTokens is a test helper that compares two token slices.
+func assertTokens(t *testing.T, want, got []token.Token) {
+ t.Helper()
+ if len(want) != len(got) {
+ t.Fatalf("token count: want %d, got %d\nwant: %v\ngot: %v", len(want), len(got), want, got)
+ }
+ for i := range want {
+ if want[i].Type != got[i].Type {
+ t.Errorf("token[%d].Type: want %v, got %v", i, want[i].Type, got[i].Type)
+ }
+ if want[i].Literal != got[i].Literal {
+ t.Errorf("token[%d].Literal: want %q, got %q", i, want[i].Literal, got[i].Literal)
+ }
+ if want[i].Pos != got[i].Pos {
+ t.Errorf("token[%d].Pos: want %d, got %d", i, want[i].Pos, got[i].Pos)
+ }
+ }
+}