/** * Copyright (c) 2011-2019, James Zhan 詹波 (jfinal@126.com). * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.jfinal.template.stat; import java.util.ArrayList; import java.util.List; /** * DKFF(Dynamic Key Feature Forward) Lexer */ class Lexer { static final char EOF = (char)-1; static final int TEXT_STATE_DIAGRAM = 999; char[] buf; int state = 0; int lexemeBegin = 0; int forward = 0; int beginRow = 1; int forwardRow = 1; TextToken previousTextToken = null; List tokens = new ArrayList(); String fileName; public Lexer(StringBuilder content, String fileName) { int len = content.length(); buf = new char[len + 1]; content.getChars(0, content.length(), buf, 0); buf[len] = EOF; this.fileName = fileName; } /** * 进入每个扫描方法之前 peek() 处于可用状态,不需要 next() * 每个扫描方法内部是否要 next() 移动,取决定具体情况 * 每个扫描方法成功返回前,将 forward 置于下一次扫描需要处理的地方 * 让下个扫描方法不必 next() * 紧靠 scanText() 之前的扫描方法在失败后必须保持住forward * 这是 scanText() 可以一直向前的保障 */ public List scan() { while (peek() != EOF) { if (peek() == '#') { if (scanDire()) { continue ; } if (scanSingleLineComment()) { continue ; } if (scanMultiLineComment()) { continue ; } if (scanNoParse()) { continue ; } } scanText(); } return tokens; } /** * 指令模式与解析规则 * 1:指令 pattern * #(p) * #id(p) * #define id(p) * #@id(p) / #@id?(p) * #else / #end * * 2:关键字类型指令在获取到关键字以后,必须要正确解析出后续内容,否则抛异常 * * 3:非关键字类型指令只有在本行内出现 # id ( 三个序列以后,才要求正确解析出后续内容 * 否则当成普通文本 */ boolean scanDire() { String id = null; StringBuilder para = null; Token idToken = null; Token paraToken = null; while (true) { switch (state) { case 0: if (peek() == '#') { // # next(); skipBlanks(); state = 1; continue ; } return fail(); case 1: if (peek() == '(') { // # ( para = scanPara(""); idToken = new Token(Symbol.OUTPUT, beginRow); paraToken = new ParaToken(para, beginRow); return addOutputToken(idToken, paraToken); } if (CharTable.isLetter(peek())) { // # id state = 10; continue ; } if (peek() == '@') { // # @ next(); skipBlanks(); if (CharTable.isLetter(peek())) { // # @ id state = 20; continue ; } } return fail(); // ----------------------------------------------------- case 10: // # id id = scanId(); Symbol symbol = Symbol.getKeywordSym(id); // 非关键字指令 if (symbol == null) { state = 11; continue ; } // define 指令 if (symbol == Symbol.DEFINE) { state = 12; continue ; } // 在支持 #seleif 的基础上,支持 #else if if (symbol == Symbol.ELSE) { if (foundFollowingIf()) { id = "else if"; symbol = Symbol.ELSEIF; } } // 无参关键字指令 if (symbol.noPara()) { return addNoParaToken(new Token(symbol, id, beginRow)); } // 有参关键字指令 skipBlanks(); if (peek() == '(') { para = scanPara(id); idToken = new Token(symbol, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } throw new ParseException("#" + id + " directive requires parentheses \"()\"", new Location(fileName, beginRow)); case 11: // 用户自定义指令必须有参数 skipBlanks(); if (peek() == '(') { para = scanPara(id); idToken = new Token(Symbol.ID, id, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } return fail(); // 用户自定义指令在没有左括号的情况下当作普通文本 case 12: // 处理 "# define id (para)" 指令 skipBlanks(); if (CharTable.isLetter(peek())) { id = scanId(); // 模板函数名称 skipBlanks(); if (peek() == '(') { para = scanPara("define " + id); idToken = new Token(Symbol.DEFINE, id, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } throw new ParseException("#define " + id + " : template function definition requires parentheses \"()\"", new Location(fileName, beginRow)); } throw new ParseException("#define directive requires identifier as a function name", new Location(fileName, beginRow)); case 20: // # @ id id = scanId(); skipBlanks(); boolean hasQuestionMark = peek() == '?'; if (hasQuestionMark) { next(); skipBlanks(); } if (peek() == '(') { para = scanPara(hasQuestionMark ? "@" + id + "?" : "@" + id); idToken = new Token(hasQuestionMark ? Symbol.CALL_IF_DEFINED : Symbol.CALL, id, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } return fail(); default : return fail(); } } } boolean foundFollowingIf() { int p = forward; while (CharTable.isBlank(buf[p])) {p++;} if (buf[p++] == 'i') { if (buf[p++] == 'f') { while (CharTable.isBlank(buf[p])) {p++;} // 要求出现 '(' 才认定解析成功,为了支持这种场景: #else if you ... if (buf[p] == '(') { forward = p; return true; } } } return false; } /** * 调用者已确定以字母或下划线开头,故一定可以获取到 id值 */ String scanId() { int idStart = forward; while (CharTable.isLetterOrDigit(next())) { ; } return subBuf(idStart, forward - 1).toString(); } /** * 扫描指令参数,成功则返回,否则抛出词法分析异常 */ StringBuilder scanPara(String id) { char quotes = '"'; int localState = 0; int parenDepth = 1; // 指令后面参数的第一个 '(' 深度为 1 next(); int paraStart = forward; while (true) { switch (localState) { case 0: for (char c=peek(); true; c=next()) { if (c == ')') { parenDepth--; if (parenDepth == 0) { // parenDepth 不可能小于0,因为初始值为 1 next(); return subBuf(paraStart, forward - 2); } continue ; } if (c == '(') { parenDepth++; continue ; } if (c == '"' || c == '\'') { quotes = c; localState = 1; break ; } if (CharTable.isExprChar(c)) { continue ; } if (c == EOF) { throw new ParseException("#" + id + " parameter can not match the end char ')'", new Location(fileName, beginRow)); } throw new ParseException("#" + id + " parameter exists illegal char: '" + c + "'", new Location(fileName, beginRow)); } break ; case 1: for (char c=next(); true; c=next()) { if (c == quotes) { if (buf[forward - 1] != '\\') { // 前一个字符不是转义字符 next(); localState = 0; break ; } else { continue ; } } if (c == EOF) { throw new ParseException("#" + id + " parameter error, the string parameter not ending", new Location(fileName, beginRow)); } } break ; } } } /** * 单行注释,开始状态 100,关注换行与 EOF */ boolean scanSingleLineComment() { while (true) { switch (state) { case 100: if (peek() == '#' && next() == '#' && next() == '#') { state = 101; continue ; } return fail(); case 101: for (char c=next(); true; c=next()) { if (c == '\n') { if (deletePreviousTextTokenBlankTails()) { return prepareNextScan(1); } else { return prepareNextScan(0); } } if (c == EOF) { deletePreviousTextTokenBlankTails(); return prepareNextScan(0); } } default : return fail(); } } } /** * 多行注释,开始状态 200,关注结尾标记与 EOF */ boolean scanMultiLineComment() { while (true) { switch (state) { case 200: if (peek() == '#' && next() == '-' && next() == '-') { state = 201; continue ; } return fail(); case 201: for (char c=next(); true; c=next()) { if (c == '-' && buf[forward + 1] == '-' && buf[forward + 2] == '#') { forward = forward + 3; if (lookForwardLineFeedAndEof() && deletePreviousTextTokenBlankTails()) { return prepareNextScan(peek() != EOF ? 1 : 0); } else { return prepareNextScan(0); } } if (c == EOF) { throw new ParseException("The multiline comment start block \"#--\" can not match the end block: \"--#\"", new Location(fileName, beginRow)); } } default : return fail(); } } } /** * 非解析块,开始状态 300,关注结尾标记与 EOF */ boolean scanNoParse() { while (true) { switch (state) { case 300: if (peek() == '#' && next() == '[' && next() == '[') { state = 301; continue ; } return fail(); case 301: for (char c=next(); true; c=next()) { if (c == ']' && buf[forward + 1] == ']' && buf[forward + 2] == '#') { addTextToken(subBuf(lexemeBegin + 3, forward - 1)); // NoParse 块使用 TextToken return prepareNextScan(3); } if (c == EOF) { throw new ParseException("The \"no parse\" start block \"#[[\" can not match the end block: \"]]#\"", new Location(fileName, beginRow)); } } default : return fail(); } } } boolean scanText() { for (char c=peek(); true; c=next()) { if (c == '#' || c == EOF) { addTextToken(subBuf(lexemeBegin, forward - 1)); return prepareNextScan(0); } } } boolean fail() { if (state < 300) { forward = lexemeBegin; forwardRow = beginRow; } if (state < 100) { state = 100; } else if (state < 200) { state = 200; } else if (state < 300) { state = 300; } else { state = TEXT_STATE_DIAGRAM; } return false; } char next() { if (buf[forward] == '\n') { forwardRow++; } return buf[++forward]; } char peek() { return buf[forward]; } void skipBlanks() { while (CharTable.isBlank(buf[forward])) { next(); } } /** * scanPara 与 scanNoParse 存在 start > end 的情况 */ StringBuilder subBuf(int start, int end) { if (start > end) { return null; } StringBuilder ret = new StringBuilder(end - start + 1); for (int i=start; i<=end; i++) { ret.append(buf[i]); } return ret; } boolean prepareNextScan(int moveForward) { for (int i=0; i