Files
u-tpl/internal/lexer.go
绝尘 861d58d718 新增: u-tpl SQL 模板引擎完整实现
- Lexer/Parser/Executor 三阶段架构
- #{param} 参数化 + ${raw} 原样替换 + 白名单安全策略
- @if/@for/@tpl/@include/@namespace 控制流
- 表达式引擎: 比较、逻辑、nil 检查、len() 内置函数
- 支持 ?/$1/:1 多数据库占位符风格
- 零依赖,纯 Go 标准库实现
2026-04-01 00:27:50 +08:00

212 lines
4.2 KiB
Go

package internal
import (
"strings"
"unicode"
)
type TokenType int
const (
TokText TokenType = iota
TokParamStart
TokRawStart
TokIfStart
TokForStart
TokTplStart
TokIncludeStart
TokNamespaceStart
TokElse
TokComment
TokEOF
)
type Token struct {
Type TokenType
Value string
Pos Pos
}
type Lexer struct {
input []rune
pos int
line int
col int
}
func NewLexer(input string) *Lexer {
return &Lexer{
input: []rune(input),
line: 1,
col: 1,
}
}
func (l *Lexer) Tokenize() ([]Token, error) {
var tokens []Token
for l.pos < len(l.input) {
ch := l.input[l.pos]
if ch == '#' {
if l.peek(1) == '{' {
tokens = append(tokens, Token{Type: TokParamStart, Value: "#{", Pos: l.curPos()})
l.advance()
l.advance()
continue
}
tokens = append(tokens, l.readComment())
continue
}
if ch == '$' && l.peek(1) == '{' {
tokens = append(tokens, Token{Type: TokRawStart, Value: "${", Pos: l.curPos()})
l.advance()
l.advance()
continue
}
if ch == '@' {
if tok, ok := l.tryDirective(); ok {
tokens = append(tokens, tok)
continue
}
}
if ch == '}' {
// Skip spaces after '}' to check for "else"
spaceOffset := 1
for l.peek(spaceOffset) == ' ' || l.peek(spaceOffset) == '\t' {
spaceOffset++
}
if l.peekWord(spaceOffset, "else") {
pos := l.curPos()
l.advance() // consume '}'
l.advanceN(spaceOffset - 1) // consume spaces
l.advanceN(4) // consume "else"
tokens = append(tokens, Token{Type: TokElse, Value: "} else", Pos: pos})
continue
}
l.advance()
// Pos stores the end position (after '}'), consistent with other TokText tokens
tokens = append(tokens, Token{Type: TokText, Value: "}", Pos: l.curPos()})
continue
}
if ch == '\n' {
l.advance()
// Pos stores the end position (after '\n'), consistent with other TokText tokens
tokens = append(tokens, Token{Type: TokText, Value: "\n", Pos: l.curPos()})
continue
}
// Regular text: scan until special character
start := l.pos
for l.pos < len(l.input) {
c := l.input[l.pos]
if c == '#' || c == '$' || c == '@' || c == '}' || c == '\n' {
break
}
l.advance()
}
if l.pos > start {
tokens = append(tokens, Token{Type: TokText, Value: string(l.input[start:l.pos]), Pos: Pos{Line: l.line, Col: l.col}})
}
}
tokens = append(tokens, Token{Type: TokEOF, Pos: Pos{Line: l.line, Col: l.col}})
return tokens, nil
}
func (l *Lexer) curPos() Pos {
return Pos{Line: l.line, Col: l.col}
}
func (l *Lexer) advance() {
if l.pos < len(l.input) {
if l.input[l.pos] == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
l.pos++
}
}
func (l *Lexer) advanceN(n int) {
for range n {
l.advance()
}
}
func (l *Lexer) peek(offset int) rune {
idx := l.pos + offset
if idx < len(l.input) {
return l.input[idx]
}
return 0
}
func (l *Lexer) peekWord(offset int, word string) bool {
runes := []rune(word)
n := len(runes)
for i := range n {
if l.peek(offset+i) != runes[i] {
return false
}
}
after := l.peek(offset + n)
return after == 0 || after == ' ' || after == '\n' || after == '\t' || after == '{' || after == '}'
}
func (l *Lexer) tryDirective() (Token, bool) {
type directive struct {
prefix []rune
ttype TokenType
skip int
}
directives := []directive{
{[]rune("@if("), TokIfStart, 4},
{[]rune("@for("), TokForStart, 5},
{[]rune("@tpl(\""), TokTplStart, 5},
{[]rune("@include(\""), TokIncludeStart, 10},
{[]rune("@namespace(\""), TokNamespaceStart, 12},
}
for _, d := range directives {
if l.matchRunes(d.prefix) {
pos := l.curPos()
val := string(d.prefix)
l.advanceN(d.skip)
return Token{Type: d.ttype, Value: val, Pos: pos}, true
}
}
return Token{}, false
}
func (l *Lexer) matchRunes(runes []rune) bool {
for i, r := range runes {
if l.peek(i) != r {
return false
}
}
return true
}
func (l *Lexer) readComment() Token {
pos := l.curPos()
l.advance() // #
start := l.pos
for l.pos < len(l.input) && l.input[l.pos] != '\n' {
l.advance()
}
// consume trailing newline so the comment line disappears
if l.pos < len(l.input) && l.input[l.pos] == '\n' {
l.advance()
}
return Token{Type: TokComment, Value: strings.TrimRightFunc(string(l.input[start:l.pos]), unicode.IsSpace), Pos: pos}
}