package internal import ( "strings" "unicode" ) type TokenType int const ( TokText TokenType = iota TokParamStart TokRawStart TokIfStart TokForStart TokTplStart TokIncludeStart TokNamespaceStart TokUseStart TokElse TokComment TokEOF ) type Token struct { Type TokenType Value string Pos Pos } type Lexer struct { input []rune pos int line int col int } func NewLexer(input string) *Lexer { return &Lexer{ input: []rune(input), line: 1, col: 1, } } func (l *Lexer) Tokenize() ([]Token, error) { var tokens []Token for l.pos < len(l.input) { ch := l.input[l.pos] if ch == '#' { if l.peek(1) == '{' { tokens = append(tokens, Token{Type: TokParamStart, Value: "#{", Pos: l.curPos()}) l.advance() l.advance() continue } tokens = append(tokens, l.readComment()) continue } if ch == '$' && l.peek(1) == '{' { tokens = append(tokens, Token{Type: TokRawStart, Value: "${", Pos: l.curPos()}) l.advance() l.advance() continue } if ch == '@' { if tok, ok := l.tryDirective(); ok { tokens = append(tokens, tok) continue } } if ch == '}' { // Skip spaces after '}' to check for "else" spaceOffset := 1 for l.peek(spaceOffset) == ' ' || l.peek(spaceOffset) == '\t' { spaceOffset++ } if l.peekWord(spaceOffset, "else") { pos := l.curPos() l.advance() // consume '}' l.advanceN(spaceOffset - 1) // consume spaces l.advanceN(4) // consume "else" tokens = append(tokens, Token{Type: TokElse, Value: "} else", Pos: pos}) continue } l.advance() // Pos stores the end position (after '}'), consistent with other TokText tokens tokens = append(tokens, Token{Type: TokText, Value: "}", Pos: l.curPos()}) continue } if ch == '\n' { l.advance() // Pos stores the end position (after '\n'), consistent with other TokText tokens tokens = append(tokens, Token{Type: TokText, Value: "\n", Pos: l.curPos()}) continue } // Regular text: scan until special character start := l.pos for l.pos < len(l.input) { c := l.input[l.pos] if c == '#' || c == '$' || c == '@' || c == '}' || c == '\n' { break } l.advance() } if l.pos > start { tokens = append(tokens, Token{Type: TokText, Value: string(l.input[start:l.pos]), Pos: Pos{Line: l.line, Col: l.col}}) } } tokens = append(tokens, Token{Type: TokEOF, Pos: Pos{Line: l.line, Col: l.col}}) return tokens, nil } func (l *Lexer) curPos() Pos { return Pos{Line: l.line, Col: l.col} } func (l *Lexer) advance() { if l.pos < len(l.input) { if l.input[l.pos] == '\n' { l.line++ l.col = 1 } else { l.col++ } l.pos++ } } func (l *Lexer) advanceN(n int) { for range n { l.advance() } } func (l *Lexer) peek(offset int) rune { idx := l.pos + offset if idx < len(l.input) { return l.input[idx] } return 0 } func (l *Lexer) peekWord(offset int, word string) bool { runes := []rune(word) n := len(runes) for i := range n { if l.peek(offset+i) != runes[i] { return false } } after := l.peek(offset + n) return after == 0 || after == ' ' || after == '\n' || after == '\t' || after == '{' || after == '}' } func (l *Lexer) tryDirective() (Token, bool) { type directive struct { prefix []rune ttype TokenType skip int } directives := []directive{ {[]rune("@if("), TokIfStart, 4}, {[]rune("@for("), TokForStart, 5}, {[]rune("@tpl(\""), TokTplStart, 5}, {[]rune("@include(\""), TokIncludeStart, 10}, {[]rune("@namespace(\""), TokNamespaceStart, 12}, {[]rune("@use(\""), TokUseStart, 6}, } for _, d := range directives { if l.matchRunes(d.prefix) { pos := l.curPos() val := string(d.prefix) l.advanceN(d.skip) return Token{Type: d.ttype, Value: val, Pos: pos}, true } } return Token{}, false } func (l *Lexer) matchRunes(runes []rune) bool { for i, r := range runes { if l.peek(i) != r { return false } } return true } func (l *Lexer) readComment() Token { pos := l.curPos() l.advance() // # start := l.pos for l.pos < len(l.input) && l.input[l.pos] != '\n' { l.advance() } // consume trailing newline so the comment line disappears if l.pos < len(l.input) && l.input[l.pos] == '\n' { l.advance() } return Token{Type: TokComment, Value: strings.TrimRightFunc(string(l.input[start:l.pos]), unicode.IsSpace), Pos: pos} }