blob: d1f55d5465288cf8eecfdafa4df7bafbd210144c [file] [log] [blame]
Sketch🕴️cdbb1892026-02-28 19:10:35 +04001package lexer
2
3import (
4 "fmt"
5 "matheval/token"
6)
7
8// Tokenize converts an input string into a slice of tokens.
9// Returns an error if the input contains invalid characters.
10func Tokenize(input string) ([]token.Token, error) {
11 var tokens []token.Token
12 i := 0
13
14 for i < len(input) {
15 ch := input[i]
16
17 // Skip whitespace.
18 if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
19 i++
20 continue
21 }
22
23 // Number: digits and dots.
24 if isDigit(ch) || ch == '.' {
25 start := i
26 hasDot := false
27 for i < len(input) && (isDigit(input[i]) || input[i] == '.') {
28 if input[i] == '.' {
29 if hasDot {
30 break // second dot ends this number
31 }
32 hasDot = true
33 }
34 i++
35 }
36 tokens = append(tokens, token.Token{
37 Type: token.Number,
38 Literal: input[start:i],
39 Pos: start,
40 })
41 continue
42 }
43
Sketch🕴️dc6d8f62026-02-28 20:45:13 +040044 // Identifier: starts with letter, continues with letters/digits.
45 if isLetter(ch) {
46 start := i
47 for i < len(input) && (isLetter(input[i]) || isDigit(input[i])) {
48 i++
49 }
50 tokens = append(tokens, token.Token{
51 Type: token.Ident,
52 Literal: input[start:i],
53 Pos: start,
54 })
55 continue
56 }
57
Sketch🕴️cdbb1892026-02-28 19:10:35 +040058 // Single-character tokens.
59 var typ token.Type
60 switch ch {
61 case '+':
62 typ = token.Plus
63 case '-':
64 typ = token.Minus
65 case '*':
66 typ = token.Star
67 case '/':
68 typ = token.Slash
69 case '(':
70 typ = token.LParen
71 case ')':
72 typ = token.RParen
Sketch🕴️dc6d8f62026-02-28 20:45:13 +040073 case ',':
74 typ = token.Comma
75 case '=':
76 typ = token.Equals
Sketch🕴️cdbb1892026-02-28 19:10:35 +040077 default:
78 return nil, fmt.Errorf("unexpected character %q at position %d", string(ch), i)
79 }
80
81 tokens = append(tokens, token.Token{
82 Type: typ,
83 Literal: string(ch),
84 Pos: i,
85 })
86 i++
87 }
88
89 // Always append EOF.
90 tokens = append(tokens, token.Token{
91 Type: token.EOF,
92 Literal: "",
93 Pos: i,
94 })
95
96 return tokens, nil
97}
98
99func isDigit(ch byte) bool {
100 return ch >= '0' && ch <= '9'
101}
Sketch🕴️dc6d8f62026-02-28 20:45:13 +0400102
103func isLetter(ch byte) bool {
104 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'
105}