package com.walker.infrastructure.utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* 语句分段读取器,它能根据关键词分段读出中间的内容。
* 你可以设置多个分段关键词让分析器来处理,例如:
*
* I am a boy, but my sister is a good girl!
* 你可以设置关键词对来解析内容:[I,boy],[but ,girl!]
* 经过这两个关键词分析后,会返回以下结果:
* [ am a],[ my sister is a good ]
*
* @author shikeying
* @date 2013-7-17
*
*/
public class SegmentReader {
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* 定义组件的选项开关
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
/* 关键词是否大小写敏感,默认否 */
private boolean keyCaseSensitive = false;
public boolean isKeyCaseSensitive() {
return keyCaseSensitive;
}
public void setKeyCaseSensitive(boolean keyCaseSensitive) {
this.keyCaseSensitive = keyCaseSensitive;
}
/* 返回结果中是否包含关键词,默认不包含 */
private boolean contentIncludeKey = false;
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* 定义组件内部私有变量
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
public boolean isContentIncludeKey() {
return contentIncludeKey;
}
public void setContentIncludeKey(boolean contentIncludeKey) {
this.contentIncludeKey = contentIncludeKey;
}
private Logger logger = LoggerFactory.getLogger(this.getClass());
/* 最终分析过后的字符串内容 */
private final StringBuilder totalResult = new StringBuilder(512);
/* 解析后的最终结果,可以通过关键词对来查找对应结果 */
// private final Map result = new HashMap(2);
private final Map> result2 = new HashMap>(2);
/* 用户添加的所有关键词信息 */
private final List keys = new ArrayList(2);
/* 开始关键词与结束关键词对应,便于快速查找 */
private final Map startEndKeys = new HashMap(2);
private int maxStartKeyLength, maxEndKeyLength = 0;
/* 当前已经扫描过入栈字符 */
private LinkedList scanStack = new LinkedList();
/* 已入栈的关键词 */
private LinkedList existKeysStack = new LinkedList();
/* 关键词排除的字符集合,即有些关键词不能出现在特定的字符中 */
/* 例如SQL语句中关键词分号不能在''中,有些字符串中也会包含关键词 */
/* 这些被排除的字符都是成对出现的。 */
public static final Set keyExcludedCharSet = new HashSet();
static {
keyExcludedCharSet.add('\'');
}
private LinkedList keyExcludedCharStack = new LinkedList();
/** ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* 组件暴露方法调用
*
** ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
/**
* 读取给定的语句,组件准备分析
* @param sentence
* @return 返回分析过的最终完整的语句内容
*/
public String read(String sentence){
assert (sentence != null && !sentence.equals(""));
if(keys.size() == 0)
throw new IllegalStateException("keys is required.");
getMaxKeyLength();
for(KeyExpression ke : keys){
// result.put(ke, new StringBuilder(512));
result2.put(ke, new ArrayList());
startEndKeys.put(ke.getStartKey(), ke.getEndKey());
}
for(char c : (keyCaseSensitive ? sentence.toCharArray() : sentence.toLowerCase().toCharArray())){
readOneCharacter(c, false);
}
/**
* 修复bug(bug20131202-1):如果把语句整体转换成小写,可能改变原始内容,例如:SQL语句中ID字段
* 会全部为小写,这样就影响了业务对数据的使用。所以不能转换小写。
*/
// for(char c : sentence.toCharArray()){
// readOneCharacter(c, false);
// }
/* 处理最后一个读取字符问题 */
readOneCharacter(scanStack.peekLast(), true);
return totalResult.toString();
}
/**
* 返回关键词对应的处理结果,
* @param startKey 开始关键词
* @param endKey 结束关键词
* @return
*/
public String getSolvedContent(String startKey, String endKey){
KeyExpression ke = new KeyExpression(startKey, endKey);
// StringBuilder sb = this.result.get(ke);
List sb = this.result2.get(ke);
return sb == null ? null : sb.size() == 0 ? null : sb.get(0).toString();
}
/**
* 根据关键词,返回该关键词包含的分析结果列表。
* 输入的关键词不能嵌套,是平行关系。如下示例:
*
* select ... from ... (select * from ...).
* select是开始,from是结束关键词
*
* @param startKey
* @param endKey
* @return
*/
public List getSolvedList(String startKey, String endKey){
return this.result2.get(new KeyExpression(startKey, endKey));
}
/**
* 关键词分析后的结果处理模式: 存储
*/
public static final int RESULT_MODE_STORE = 1;
/**
* 关键词分析后的结果处理模式: 删除
*/
public static final int RESULT_MODE_REMOVE = 2;
/**
* 关键词分析后的结果处理模式: 替换内容
*/
public static final int RESULT_MODE_REPLACE = 3;
/**
* 关键词分析后的结果处理模式: 通过回调接口来处理
*/
public static final int RESULT_MODE_CALLBACK = 4;
/**
* 添加默认的关键词,系统默认会返回解析的结果
* @param startKey 开始关键词
* @param endKey 结束关键词,可以没有
*/
public void addKey(String startKey, String endKey){
addKey(startKey, endKey, RESULT_MODE_STORE);
}
/**
* 添加关键词,删除行为。即:关键词之间的内容被删除
* @param startKey
* @param endKey
*/
public void addRemoveKey(String startKey, String endKey){
addKey(startKey, endKey, RESULT_MODE_REMOVE);
}
/**
* 添加关键词,替换行为。即:关键词之间内容会被替换
* @param startKey
* @param endKey
* @param replace 要替换的内容
*/
public void addReplaceKey(String startKey, String endKey, String replace){
addKey(startKey, endKey, RESULT_MODE_REPLACE, replace);
}
/**
* 添加关键词,回调行为。即:关键词之间内容会被回调接口继续调用,最终结果仍会返回
* @param startKey
* @param endKey
* @param callback 用户自定义实现的回调实现
*/
public void addCallbackKey(String startKey, String endKey, CallBack callback){
addKey(startKey, endKey, RESULT_MODE_CALLBACK, callback);
}
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* 组件内部私有方法
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
private void addKey(String startKey, String endKey, int mode, Object ...others){
assert (startKey != null && !startKey.equals(""));
if(!keyCaseSensitive){
startKey = startKey.toLowerCase();
endKey = (endKey == null ? null : endKey.toLowerCase());
}
for(KeyExpression ke : keys){
if(ke.getStartKey().equals(startKey))
throw new IllegalArgumentException("设置的开始关键字重复, key = " + startKey);
}
KeyExpression ke = new KeyExpression(startKey, endKey);
if(mode == RESULT_MODE_REPLACE || mode == RESULT_MODE_CALLBACK){
if(others == null || others.length > 1)
throw new IllegalArgumentException("argument is required in mode 'REPLACE' and 'CALLBACK'.");
if(mode == RESULT_MODE_REPLACE){
if(!(others[0] instanceof String))
throw new IllegalArgumentException("argument of replace must be String.");
else
ke.setResultMode(RESULT_MODE_REPLACE)
.setReplace(others[0].toString());
}
if(mode == RESULT_MODE_CALLBACK){
if(!(others[0] instanceof CallBack))
throw new IllegalArgumentException("argument of callback must be CallBack.");
else
ke.setResultMode(RESULT_MODE_CALLBACK)
.setCallBack((CallBack)others[0]);
}
} else if(mode == RESULT_MODE_REMOVE){
ke.setResultMode(RESULT_MODE_REMOVE);
}
keys.add(ke);
}
private void getMaxKeyLength(){
int skSize = 0;
int ekSize = 0;
for(KeyExpression ke : keys){
int k1 = ke.getStartKey().length();
if(k1 > skSize){
skSize = k1;
}
if(ke.getEndKey() != null && ke.getEndKey().length() > ekSize){
ekSize = ke.getEndKey().length();
}
}
this.maxStartKeyLength = skSize;
this.maxEndKeyLength = ekSize;
logger.debug("maxStartKeyLength = " + maxStartKeyLength);
logger.debug("maxEndKeyLength = " + maxEndKeyLength);
}
private char previousRead;
private void readOneCharacter(char c, boolean isLastRead){
/* 读到空格,而且上一个也是空格,忽略 */
if(c == 32 && previousRead == 32) return;
checkKeyExcludedChar(c);
// String scaned = getScanedChars().trim();
String scaned = getScanedChars();
// System.out.println("已扫描内容 = " + scaned);
doMatchedInScaned(scaned);
if(!isLastRead){
scanStack.addLast(c);
previousRead = c;
} else {
// 读到最后一个字符了,如果栈中存在内容就输出
this.totalResult.append(getScanedChars());
}
}
/**
* 栈中是否存在被排除的字符,如果存在返回 true
* @return
*/
private boolean inExcludedCharStack(){
return !keyExcludedCharStack.isEmpty();
}
/**
* 检查输入的字符是否是排除的字符
* @param c
*/
private void checkKeyExcludedChar(char c){
if(keyExcludedCharSet.contains(c)){
Character existInStack = this.keyExcludedCharStack.peekLast();
if(existInStack != null && c == existInStack){
// logger.debug("读入的字符在排除字符栈中已经存在,配对成功, 可以清除了. char = " + c);
keyExcludedCharStack.pollLast();
} else {
// logger.debug("读入的字符是要排除的字符,但栈顶并没有匹配的,直接加入栈顶");
keyExcludedCharStack.offerLast(c);
}
}
}
/**
* 在已经扫描的字符串中,是否存在匹配的关键词
* @return
*/
private boolean doMatchedInScaned(String scaned){
String findStartKey = doMatchStartKey(scaned);
if(findStartKey != null){
/* 在清空扫描栈之前,需要保存到关键词对应变量中 */
String savedData = null;
String savedKey = null;
if(!scaned.equals(findStartKey)) {
// savedData = this.contentIncludeKey ? scaned : scaned.replaceAll(findStartKey, "");
//2017-06-05 时克英修改,对于单符号关键词,必须用转义表示正则表达式
if(findStartKey.equals("{") || findStartKey.equals("[")){
savedData = this.contentIncludeKey ? scaned : scaned.replaceAll("\\\\" + findStartKey, "");
} else {
savedData = this.contentIncludeKey ? scaned : scaned.replaceAll(findStartKey, "");
}
}
savedKey = existKeysStack.peekLast();
if(savedKey != null){
int index = storedKeyCounter.get(savedKey);
List sbs = result2.get(new KeyExpression(savedKey
, startEndKeys.get(savedKey)));
sbs.get(index).append(savedData == null ? "" : savedData);
// result.get(new KeyExpression(savedKey
// , startEndKeys.get(savedKey))).append(savedData == null ? "" : savedData);
}
existKeysStack.offerLast(findStartKey);
scanStack.clear();
return true;
}
String existStartKey = existKeysStack.peekLast();
String endKey = existStartKey == null ? null : startEndKeys.get(existStartKey);
String matchedEndKey = doMatchEndKey(scaned, existStartKey, endKey);
if(matchedEndKey != null){
existKeysStack.pollLast();
scanStack.clear();
return true;
}
return false;
}
private String doMatchEndKey(String scaned, String startKey, String endKey){
int _ss = scaned.length();
int size = 0;
String cbCall = null;
for(KeyExpression ke : keys){
if(ke.getEndKey() == null) continue;
if(!ke.getStartKey().equalsIgnoreCase(startKey)) continue; // 必须是与开始匹配的结束,否则不比较
size = ke.getEndKey().length();
// if(_ss >= size && scaned.indexOf(ke.getEndKey()) >= 0){
if(_ss >= size && scaned.endsWith(ke.getEndKey()) && !inExcludedCharStack()){
if(ke.getResultMode() == RESULT_MODE_STORE){
writeResult2(scaned, ke);
} else if(ke.getResultMode() == RESULT_MODE_REMOVE){
if(this.contentIncludeKey)
totalResult.append(ke.getEndKey());
} else if(ke.getResultMode() == RESULT_MODE_REPLACE){
writeResult2(ke.getReplace(), ke);
} else if(ke.getResultMode() == RESULT_MODE_CALLBACK){
cbCall = ke.getCallBack().afterSegment(scaned, ke.getStartKey(), ke.getEndKey());
writeResult2(cbCall, ke);
}
return ke.getEndKey();
}
}
return null;
}
private Map storedKeyCounter = new HashMap(2);
private String doMatchStartKey(String scaned){
int _ss = scaned.length();
int size = 0;
String startKey = null;
String previousKey = null; // 栈中保存的开始关键词
for(KeyExpression ke : keys){
startKey = ke.getStartKey();
size = ke.getStartKey().length();
if(_ss >= size && scaned.endsWith(startKey)){
/* 如果后续字符串中包含了开始关键词,我们并不处理,因为主要通过单词来区分而不是字符 */
previousKey = existKeysStack.peekLast();
if(previousKey != null && previousKey.equals(startKey))
continue;
/* 删除关键词优先处理,如果发现栈里面已经存在关键词(开始) */
/* 并且是一个删除类型,那么后面嵌套的关键词就不再处理。 */
if(hasRemovedKeyInStack()) continue;
/* 遇到内容中存在多处相同的关键词时,我们需要同时保留这些分析结果 */
if(ke.getResultMode() != RESULT_MODE_REMOVE){
Integer i = storedKeyCounter.get(startKey);
if(i == null){
i = 0;
storedKeyCounter.put(startKey, i);
logger.debug("已存储的key '" + startKey + "' i=0.");
} else {
storedKeyCounter.put(startKey, ++i);
logger.debug("已存储的key '" + startKey + "' i=" + i);
}
List sbs = result2.get(new KeyExpression(startKey
, startEndKeys.get(startKey)));
sbs.add(i, new StringBuilder());
logger.debug("key '" + startKey + "' 创建了第" + i + "个StringBuilder.");
}
/*--------------------------------------------------*/
if(this.contentIncludeKey){
totalResult.append(scaned);
} else {
// totalResult.append(scaned.replaceAll(ke.getStartKey(), ""));
if(scaned.endsWith(ke.getStartKey())){
int indx = scaned.length() - ke.getStartKey().length();
totalResult.append(scaned.substring(0, indx));
} else
totalResult.append(scaned);
}
return ke.getStartKey();
}
}
return null;
}
private void writeResult2(String scaned, KeyExpression ke){
writeResult2(scaned, ke, true);
}
private void writeResult2(String scaned, KeyExpression ke, boolean includeTotalResult){
int index = storedKeyCounter.get(ke.getStartKey());
if(this.contentIncludeKey){
result2.get(ke).get(index).append(scaned);
if(includeTotalResult)
totalResult.append(scaned);
} else {
// result2.get(ke).get(index).append(scaned.replaceAll(, ""));
// totalResult.append(scaned.replaceAll(ke.getEndKey(), ""));
int eIndx = scaned.length() - ke.getEndKey().length();
result2.get(ke).get(index).append(scaned.substring(0, eIndx));
if(includeTotalResult)
totalResult.append(scaned.substring(0, eIndx));
}
}
/**
* 在关键词栈中已经存在了"删除类型的关键词"
* @return 如果存在返回true
*/
private boolean hasRemovedKeyInStack(){
String _existKey = existKeysStack.peekLast();
if(_existKey == null) return false;
KeyExpression _k = new KeyExpression(_existKey, startEndKeys.get(_existKey));
for(KeyExpression ke : keys){
if(ke.equals(_k) && ke.getResultMode() == RESULT_MODE_REMOVE){
return true;
}
}
return false;
}
private String getScanedChars(){
StringBuilder sb = new StringBuilder(8);
for(char c : scanStack){
sb.append(c);
}
return sb.toString();
}
/**
* 定义关键词表达式对象,内部用来处理关键词的各种属性和逻辑
* 这是封装的一个好处,系统通过此对象来完成对关键词的各种操作。
* @author shikeying
*
*/
private class KeyExpression {
private String startKey;
private String endKey;
private int resultMode = RESULT_MODE_STORE;
private CallBack callBack;
private String replace;
public KeyExpression(String startKey, String endKey){
assert (startKey != null && !startKey.equals(""));
if(!keyCaseSensitive){
this.startKey = startKey.toLowerCase();
this.endKey = (endKey == null ? null : endKey.toLowerCase());
} else {
this.startKey = startKey;
this.endKey = endKey;
}
}
public String getStartKey() {
return startKey;
}
public String getEndKey() {
return endKey;
}
public String getReplace() {
return replace;
}
public CallBack getCallBack() {
return callBack;
}
public int getResultMode() {
return resultMode;
}
public KeyExpression setResultMode(int mode){
this.resultMode = mode;
return this;
}
public KeyExpression setReplace(String replace){
this.replace = replace;
return this;
}
public KeyExpression setCallBack(CallBack callback){
this.callBack = callback;
return this;
}
public int hashCode(){
return (31 + 13*this.startKey.hashCode()
+ (this.endKey == null ? 0 : this.endKey.hashCode()*13));
}
public boolean equals(Object o){
if(o == null) return false;
if(o instanceof KeyExpression){
KeyExpression ke = (KeyExpression)o;
if(ke == this) return true;
if(ke.startKey.equals(this.startKey)){
return (ke.endKey == null ?
(this.endKey == null ? true : false)
: (this.endKey != null && this.endKey.equals(ke.endKey) ? true : false));
}
}
return false;
}
public String toString(){
return new StringBuilder().append("{skey=").append(startKey)
.append(", ekey=").append(endKey)
.append(", mode=").append(resultMode)
.append(", replace=").append(replace)
.append(", callback=").append(callBack==null ? "" : callBack.getClass().getName())
.append("}").toString();
}
}
/**
* 分段语句处理回调函数,用户可以自定义实现系统分析后的结果
* @author shikeying
*
*/
protected interface CallBack{
String afterSegment(String segmentResult, String keyStart, String keyEnd);
}
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* test method
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
void print(){
System.out.println("previousRead = " + (int)previousRead);
System.out.println("scanStack = " + scanStack.toArray());
for(char c : scanStack)
System.out.print(c);
}
public static void main(String[] args){
// SegmentReader sr = new SegmentReader();
// sr.addKey("boy", "girl");
// sr.addKey("must ", null);
// String result = sr.read("I am a boy, but my sister is a good girl.");
// sr.print();
// System.out.println("========= result ======== \t");
// System.out.println(result);
// System.out.println("========= getKeys(boy,girl) ======== \t");
// System.out.println(sr.getSolvedContent("boy", "girl"));
StringBuilder test = new StringBuilder();
test.append("OPEN out_cursor FOR\r\n");
test.append("--查询电子钱包消费和月票消费\r\n");
test.append("SELECT cardtype,\r\n");
test.append("NVL (SUM (operno), 0) AS operno,\r\n");
test.append("NVL (SUM (viceopermn), 0) AS viceopermn\r\n");
test.append("to_char('sum',10,90) end");
test.append("FROM ( --电子钱包和月票钱包\r\n");
test.append("select * FROM TABLE t WHERE opdt >= TO_DATE(prmsdate, 'YYYY-MM-DD HH24:MI;SS'));");
test.append("ELSE \r\n");
test.append("OPEN out_cursor FOR querysql;\r\n");
System.out.println(test);
System.out.println("--------- start... ---------");
SegmentReader sr2 = new SegmentReader();
sr2.setKeyCaseSensitive(true);
sr2.addKey("OPEN out_cursor FOR", ";");
sr2.addRemoveKey("--", "\r\n");
sr2.addKey("to_char", "end");
System.out.println(sr2.read(test.toString()));
System.out.println("------------------");
System.out.println(sr2.getSolvedList("OPEN out_cursor FOR", ";"));
System.out.println("##########");
System.out.println(sr2.getSolvedContent("to_char", "end"));
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
String srcSql = "select * from s_user_core where id = {uid}";
SegmentReader sr = new SegmentReader();
// sr.setKeyCaseSensitive(false);
sr.addKey("{", "}");
System.out.println(sr.read(srcSql));
List paramList = sr.getSolvedList("{", "}");
System.out.println(paramList);
}
}