blob: d1f55d5465288cf8eecfdafa4df7bafbd210144c [file] [log] [blame]
package lexer
import (
"fmt"
"matheval/token"
)
// Tokenize converts an input string into a slice of tokens.
// Returns an error if the input contains invalid characters.
func Tokenize(input string) ([]token.Token, error) {
var tokens []token.Token
i := 0
for i < len(input) {
ch := input[i]
// Skip whitespace.
if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
i++
continue
}
// Number: digits and dots.
if isDigit(ch) || ch == '.' {
start := i
hasDot := false
for i < len(input) && (isDigit(input[i]) || input[i] == '.') {
if input[i] == '.' {
if hasDot {
break // second dot ends this number
}
hasDot = true
}
i++
}
tokens = append(tokens, token.Token{
Type: token.Number,
Literal: input[start:i],
Pos: start,
})
continue
}
// Identifier: starts with letter, continues with letters/digits.
if isLetter(ch) {
start := i
for i < len(input) && (isLetter(input[i]) || isDigit(input[i])) {
i++
}
tokens = append(tokens, token.Token{
Type: token.Ident,
Literal: input[start:i],
Pos: start,
})
continue
}
// Single-character tokens.
var typ token.Type
switch ch {
case '+':
typ = token.Plus
case '-':
typ = token.Minus
case '*':
typ = token.Star
case '/':
typ = token.Slash
case '(':
typ = token.LParen
case ')':
typ = token.RParen
case ',':
typ = token.Comma
case '=':
typ = token.Equals
default:
return nil, fmt.Errorf("unexpected character %q at position %d", string(ch), i)
}
tokens = append(tokens, token.Token{
Type: typ,
Literal: string(ch),
Pos: i,
})
i++
}
// Always append EOF.
tokens = append(tokens, token.Token{
Type: token.EOF,
Literal: "",
Pos: i,
})
return tokens, nil
}
func isDigit(ch byte) bool {
return ch >= '0' && ch <= '9'
}
func isLetter(ch byte) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'
}