214 lines
4.3 KiB
Go
214 lines
4.3 KiB
Go
package internal
|
|
|
|
import (
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
type TokenType int
|
|
|
|
const (
|
|
TokText TokenType = iota
|
|
TokParamStart
|
|
TokRawStart
|
|
TokIfStart
|
|
TokForStart
|
|
TokTplStart
|
|
TokIncludeStart
|
|
TokNamespaceStart
|
|
TokUseStart
|
|
TokElse
|
|
TokComment
|
|
TokEOF
|
|
)
|
|
|
|
type Token struct {
|
|
Type TokenType
|
|
Value string
|
|
Pos Pos
|
|
}
|
|
|
|
type Lexer struct {
|
|
input []rune
|
|
pos int
|
|
line int
|
|
col int
|
|
}
|
|
|
|
func NewLexer(input string) *Lexer {
|
|
return &Lexer{
|
|
input: []rune(input),
|
|
line: 1,
|
|
col: 1,
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) Tokenize() ([]Token, error) {
|
|
var tokens []Token
|
|
|
|
for l.pos < len(l.input) {
|
|
ch := l.input[l.pos]
|
|
|
|
if ch == '#' {
|
|
if l.peek(1) == '{' {
|
|
tokens = append(tokens, Token{Type: TokParamStart, Value: "#{", Pos: l.curPos()})
|
|
l.advance()
|
|
l.advance()
|
|
continue
|
|
}
|
|
tokens = append(tokens, l.readComment())
|
|
continue
|
|
}
|
|
|
|
if ch == '$' && l.peek(1) == '{' {
|
|
tokens = append(tokens, Token{Type: TokRawStart, Value: "${", Pos: l.curPos()})
|
|
l.advance()
|
|
l.advance()
|
|
continue
|
|
}
|
|
|
|
if ch == '@' {
|
|
if tok, ok := l.tryDirective(); ok {
|
|
tokens = append(tokens, tok)
|
|
continue
|
|
}
|
|
}
|
|
|
|
if ch == '}' {
|
|
// Skip spaces after '}' to check for "else"
|
|
spaceOffset := 1
|
|
for l.peek(spaceOffset) == ' ' || l.peek(spaceOffset) == '\t' {
|
|
spaceOffset++
|
|
}
|
|
if l.peekWord(spaceOffset, "else") {
|
|
pos := l.curPos()
|
|
l.advance() // consume '}'
|
|
l.advanceN(spaceOffset - 1) // consume spaces
|
|
l.advanceN(4) // consume "else"
|
|
tokens = append(tokens, Token{Type: TokElse, Value: "} else", Pos: pos})
|
|
continue
|
|
}
|
|
l.advance()
|
|
// Pos stores the end position (after '}'), consistent with other TokText tokens
|
|
tokens = append(tokens, Token{Type: TokText, Value: "}", Pos: l.curPos()})
|
|
continue
|
|
}
|
|
|
|
if ch == '\n' {
|
|
l.advance()
|
|
// Pos stores the end position (after '\n'), consistent with other TokText tokens
|
|
tokens = append(tokens, Token{Type: TokText, Value: "\n", Pos: l.curPos()})
|
|
continue
|
|
}
|
|
|
|
// Regular text: scan until special character
|
|
start := l.pos
|
|
for l.pos < len(l.input) {
|
|
c := l.input[l.pos]
|
|
if c == '#' || c == '$' || c == '@' || c == '}' || c == '\n' {
|
|
break
|
|
}
|
|
l.advance()
|
|
}
|
|
if l.pos > start {
|
|
tokens = append(tokens, Token{Type: TokText, Value: string(l.input[start:l.pos]), Pos: Pos{Line: l.line, Col: l.col}})
|
|
}
|
|
}
|
|
|
|
tokens = append(tokens, Token{Type: TokEOF, Pos: Pos{Line: l.line, Col: l.col}})
|
|
return tokens, nil
|
|
}
|
|
|
|
func (l *Lexer) curPos() Pos {
|
|
return Pos{Line: l.line, Col: l.col}
|
|
}
|
|
|
|
func (l *Lexer) advance() {
|
|
if l.pos < len(l.input) {
|
|
if l.input[l.pos] == '\n' {
|
|
l.line++
|
|
l.col = 1
|
|
} else {
|
|
l.col++
|
|
}
|
|
l.pos++
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) advanceN(n int) {
|
|
for range n {
|
|
l.advance()
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) peek(offset int) rune {
|
|
idx := l.pos + offset
|
|
if idx < len(l.input) {
|
|
return l.input[idx]
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (l *Lexer) peekWord(offset int, word string) bool {
|
|
runes := []rune(word)
|
|
n := len(runes)
|
|
for i := range n {
|
|
if l.peek(offset+i) != runes[i] {
|
|
return false
|
|
}
|
|
}
|
|
after := l.peek(offset + n)
|
|
return after == 0 || after == ' ' || after == '\n' || after == '\t' || after == '{' || after == '}'
|
|
}
|
|
|
|
func (l *Lexer) tryDirective() (Token, bool) {
|
|
type directive struct {
|
|
prefix []rune
|
|
ttype TokenType
|
|
skip int
|
|
}
|
|
|
|
directives := []directive{
|
|
{[]rune("@if("), TokIfStart, 4},
|
|
{[]rune("@for("), TokForStart, 5},
|
|
{[]rune("@tpl(\""), TokTplStart, 5},
|
|
{[]rune("@include(\""), TokIncludeStart, 10},
|
|
{[]rune("@namespace(\""), TokNamespaceStart, 12},
|
|
{[]rune("@use(\""), TokUseStart, 6},
|
|
}
|
|
|
|
for _, d := range directives {
|
|
if l.matchRunes(d.prefix) {
|
|
pos := l.curPos()
|
|
val := string(d.prefix)
|
|
l.advanceN(d.skip)
|
|
return Token{Type: d.ttype, Value: val, Pos: pos}, true
|
|
}
|
|
}
|
|
|
|
return Token{}, false
|
|
}
|
|
|
|
func (l *Lexer) matchRunes(runes []rune) bool {
|
|
for i, r := range runes {
|
|
if l.peek(i) != r {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (l *Lexer) readComment() Token {
|
|
pos := l.curPos()
|
|
l.advance() // #
|
|
start := l.pos
|
|
for l.pos < len(l.input) && l.input[l.pos] != '\n' {
|
|
l.advance()
|
|
}
|
|
// consume trailing newline so the comment line disappears
|
|
if l.pos < len(l.input) && l.input[l.pos] == '\n' {
|
|
l.advance()
|
|
}
|
|
return Token{Type: TokComment, Value: strings.TrimRightFunc(string(l.input[start:l.pos]), unicode.IsSpace), Pos: pos}
|
|
}
|