package com.walker.semantics; import com.walker.semantics.util.SemanticsUtils; import org.ansj.domain.Result; import org.ansj.domain.Term; import org.ansj.splitWord.analysis.DicAnalysis; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * 描述:用户输入指令语句对象 * @author 时克英 * @date 2020年11月20日 上午9:22:23 */ public class InputWord { private String srcText; private List wordList = new ArrayList<>(8); private int total = 0; // 总分词数量(包含重复的) // // 以下为分析语义使用的属性 // // 是否存在时间词,据此可决定是否解析时间 // private boolean hasWordTime = false; // 添加属性,用于根据词的名称查找元数据,2020-12-23 private Map wordMetaCache = new HashMap<>(16); private int sceneContextId = 0; private String user; public InputWord(String srcWords){ if(SemanticsUtils.isEmpty(srcWords)){ throw new IllegalArgumentException("srcWords is required!"); } this.srcText = srcWords; // 分词填充 Result result = DicAnalysis.parse(this.srcText); SpeechPart sp = null; for(Term t : result.getTerms()){ sp = SpeechPart.toSpeechPart(t.getNatureStr()); if(sp.isFocus()){ this.addWordMeta(new WordMeta(t.getName(), sp)); } } } public void addWordMeta(WordMeta wm){ this.wordMetaCache.put(wm.getText(), wm); // 这里允许词重复,做标记 if(this.wordList.contains(wm)){ wm.setDuplication(true); } wm.setIndex(total); this.wordList.add(wm); this.total++; } public String getSrcText(){ return this.srcText; } public List getWordMetaList(){ return this.wordList; } /** * 共包含多少个词元(可能有重复的) * @return */ public int getTotal() { return total; } /** * 返回词元的map对象,key = 单词 * @return */ public Map getWordMetaMap(){ return this.wordMetaCache; } /** * 返回语句的长度 * @return */ public int getTextLength(){ return this.srcText.length(); } /** * 根据索引返回单词词元 * @param index * @return */ public WordMeta getWordMeta(int index){ if(index >= this.total){ throw new IllegalArgumentException("超过索引,无法返回WordMeta:" + index + ", total=" + this.total); } return this.wordList.get(index); } @Override public String toString(){ return this.wordList.toString(); } }