package com.walker.semantics.support; import com.walker.infrastructure.ApplicationRuntimeException; import com.walker.infrastructure.utils.StringUtils; import com.walker.semantics.InputWord; import com.walker.semantics.OwnerTextItem; import com.walker.semantics.OwnerTextStore; import com.walker.semantics.SemanticsManager; import com.walker.semantics.SpeechPart; import com.walker.semantics.TextSimilar; import com.walker.semantics.TextSimilarEngine; import com.walker.semantics.WordMeta; import com.walker.semantics.util.TextSimilarComparator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; public abstract class AbstractTextSimilarEngine implements TextSimilarEngine { protected final transient Logger logger = LoggerFactory.getLogger(getClass()); // 存储 key = 业务ID,value = 词库关键词Hash值(多个) // private final Map idAndWordHashCache = new HashMap<>(256); // key = 业务ID,value = 原始关键词内容,逗号分隔的,回显数据使用 // private final Map idAndTextCache = new HashMap<>(256); private final TextSimilarComparator comparator = new TextSimilarComparator(); // 2023-09-12,增加归属人,支持多个用户同时使用 private final Map ownerTextStoreMap = new HashMap<>(4); @Override public SemanticsManager getSemanticsManager() { return this.semanticsManager; } @Override public List search(String[] text, String owner) { if(StringUtils.isEmpty(owner)){ throw new IllegalArgumentException("owner必须输入"); } if(text == null || text.length == 0){ throw new IllegalArgumentException("text必须输入"); } OwnerTextStore ownerTextStore = this.ownerTextStoreMap.get(owner); if(ownerTextStore == null){ // throw new IllegalStateException("没有找到owner关键词配置数据,owner=" + owner); logger.warn("没有找到owner关键词配置数据,owner=" + owner); return null; } Integer[] inputHashCode = new Integer[text.length]; for(int i=0; i data = new ArrayList<>(8); TextSimilar textSimilar = null; // for(Map.Entry entry : this.idAndWordHashCache.entrySet()){ for(Map.Entry entry : ownerTextStore.getIdAndWordHashCache().entrySet()){ textSimilar = this.calculateOneSimilar(inputHashCode, entry.getKey(), entry.getValue(), ownerTextStore.getIdAndTextCache()); if(textSimilar != null){ data.add(textSimilar); // 如果存在完美匹配的,后面就不看了。2023-08-17 if(textSimilar.isPerfect()){ break; } } } // 对集合排序 Collections.sort(data, comparator); return data; } protected TextSimilar calculateOneSimilar(Integer[] wordsHash , String id, Integer[] userKeywordHash, Map idAndTextCache){ int wn = wordsHash.length; int rn = userKeywordHash.length; TextSimilar textSimilar = new TextSimilar(); textSimilar.setId(id); // textSimilar.setText(this.idAndTextCache.get(id)); textSimilar.setText(idAndTextCache.get(id)); int matchSize = 0; if(wn > rn){ // 输入多于词库 for(Integer userHashCode : userKeywordHash){ for(Integer inputHashCode : wordsHash){ if(userHashCode.intValue() == inputHashCode.intValue()){ matchSize ++; break; } } } if(matchSize == 0){ return null; } if(matchSize == rn){ // 输入的多,词库词条全命中,额外+2分 textSimilar.setDis(rn + 20); } else { // 输入的多,词库词条半命中,额外+1分 textSimilar.setDis(matchSize + 10); } } else if(wn == rn){ // 输入等于词库 for(Integer userHashCode : userKeywordHash){ for(Integer inputHashCode : wordsHash){ if(userHashCode.intValue() == inputHashCode.intValue()){ matchSize ++; break; } } } if(matchSize == 0){ return null; } if(matchSize == rn){ // 输入与词库相等,词库词条全命中,额外+2分 textSimilar.setDis(matchSize + 100); textSimilar.setPerfect(true); } else { // 输入与词库相等,词库词条半命中,额外+5分 textSimilar.setDis(matchSize + 20); } } else if(wn < rn) { // 输入少于词库 for(Integer inputHashCode : wordsHash){ for(Integer userHashCode : userKeywordHash){ if(inputHashCode.intValue() == userHashCode.intValue()){ matchSize ++; break; } } } if(matchSize == 0){ return null; } if(matchSize == wn){ // 输入与词库相等,输入全命中,额外+2分 textSimilar.setDis(matchSize + 80); } else { // 输入与词库相等,输入半命中,额外+5分 textSimilar.setDis(matchSize + 20); } } return textSimilar; } @Override public void loadLibrary() { // List data = this.doLoadUserLibrary(); // if(StringUtils.isEmptyList(data)){ // logger.warn("未加载到任何用户词库,TextSimilarEngine可能无法正常工作"); // return; // } // String id = null; // for(Variable v : data){ // id = v.getId(); // if(this.idAndWordHashCache.get(id) != null){ // throw new IllegalArgumentException("用户词库中已经存在该业务id = " + id); // } // this.idAndWordHashCache.put(id, SemanticsUtils.acquireWordHashCode(v.getStringValue())); // this.idAndTextCache.put(id, v.getStringValue()); // } List data = this.doLoadUserLibrary(); if(StringUtils.isEmptyList(data)){ logger.warn("未加载到任何用户词库,TextSimilarEngine可能无法正常工作"); return; } OwnerTextStore ownerTextStore = null; for(OwnerTextItem item : data){ ownerTextStore = this.ownerTextStoreMap.get(item.getOwner()); if(ownerTextStore == null){ ownerTextStore = new OwnerTextStore(item.getOwner()); this.ownerTextStoreMap.put(item.getOwner(), ownerTextStore); } if(ownerTextStore.getWordHashCode(item.getId()) != null){ throw new IllegalArgumentException("用户词库中已经存在该业务id = " + item.getId() + ", owner =" + item.getOwner()); } ownerTextStore.putWord(item.getId(), item.getText()); } } /** * 业务系统加载自己的词库数据。 *
     *     1) key = 业务ID
     *     2) value = 关键词,多个使用英文逗号分隔
     * 
* @return * @date 2023-08-17 */ // protected abstract List doLoadUserLibrary(); protected abstract List doLoadUserLibrary(); @Override public void registerKeyword(String[] words, SpeechPart speechPart) { this.checkSemanticsManager(); if(words == null || words.length == 0){ throw new IllegalArgumentException("请设置words"); } if(speechPart == null){ speechPart = SpeechPart.MY_N; } try { for(String word : words){ this.semanticsManager.registerKeyWord(0, word, speechPart); } } catch (Exception ex){ logger.error("向词库注册关键词,出现异常:" + ex.getMessage(), ex); throw new ApplicationRuntimeException("向词库注册关键词,出现异常:" + ex.getMessage(), ex); } } @Override public void removeKeyword(String keyword) { this.checkSemanticsManager(); this.semanticsManager.removeKeyWord(0, keyword); } @Override public List extract(String input) { if(StringUtils.isEmpty(input)){ throw new IllegalArgumentException("input必须输入"); } input = input.trim().toLowerCase(); InputWord inputWord = new InputWord(input); return inputWord.getWordMetaList(); } public void setSemanticsManager(SemanticsManager semanticsManager) { this.semanticsManager = semanticsManager; } private void checkSemanticsManager(){ if(this.semanticsManager == null){ throw new IllegalStateException("semanticsManager必须设置"); } } private SemanticsManager semanticsManager; }