walker-support-milvus/pom.xml | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
walker-support-milvus/src/main/java/com/walker/support/milvus/MetricType.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
walker-support-milvus/src/main/java/com/walker/support/milvus/OperateService.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
walker-tcp/src/main/java/com/walker/tcp/netty/WebSocketNettyHandler.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
walker-text-semantics/src/main/java/com/walker/semantics/InputWord.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
walker-text-semantics/src/test/java/com/walker/semantics/TestInputWord.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
walker-support-milvus/pom.xml
@@ -14,7 +14,7 @@ <packaging>jar</packaging> <properties> <milvus.version>2.1.0</milvus.version> <milvus.version>2.3.4</milvus.version> </properties> <dependencies> walker-support-milvus/src/main/java/com/walker/support/milvus/MetricType.java
New file @@ -0,0 +1,39 @@ package com.walker.support.milvus; public enum MetricType { /** * 适用于自然语言处理的距离计算,milvus中对应:'IP' */ NLP { public String getIndex(){ return INDEX_NLP; } }, /** * 适用于机器视觉的距离计算,milvus中对应:'L2' */ IMAGE { public String getIndex(){ return INDEX_IMAGE; } }; public static final MetricType getType(String index){ if(index.equals(INDEX_NLP)){ return NLP; } else if(index.equals(INDEX_IMAGE)){ return IMAGE; }else { throw new IllegalArgumentException("不支持的距离类型:" + index); } } public String getIndex(){ throw new AbstractMethodError(); } public static final String INDEX_NLP = "IP"; public static final String INDEX_IMAGE = "L2"; } walker-support-milvus/src/main/java/com/walker/support/milvus/OperateService.java
@@ -36,7 +36,8 @@ * @param indexType 索引类型,参考:milvus索引类型字符串(IVF_FLAT/IVF_SQ8/IVF_PQ/HNSW/FLAT/ANNOY/等) * @param indexParam 索引参数,常用有:"{\"nlist\":1024}" */ boolean createIndex(String tableName, String fieldName, String indexType, String indexParam); boolean createIndex(String tableName, String fieldName, String indexType, String indexParam , com.walker.support.milvus.MetricType myMetricType); /** * 删除已有的索引 walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java
@@ -154,7 +154,8 @@ } @Override public boolean createIndex(String tableName, String fieldName, String indexType, String indexParam){ public boolean createIndex(String tableName, String fieldName, String indexType, String indexParam , com.walker.support.milvus.MetricType myMetricType){ this.checkConnection(); IndexType INDEX_TYPE = null; if(indexType.equals("IVF_FLAT")){ @@ -165,22 +166,43 @@ INDEX_TYPE = IndexType.IVF_PQ; } else if(indexType.equals("HNSW")){ INDEX_TYPE = IndexType.HNSW; } else if(indexType.equals("ANNOY")){ INDEX_TYPE = IndexType.ANNOY; } else if(indexType.equals("FLAT")){ } else if(indexType.equals("ANNOY")){ // INDEX_TYPE = IndexType.ANNOY; throw new UnsupportedOperationException("新版本已不支持:ANNOY"); } else if(indexType.equals("FLAT")){ INDEX_TYPE = IndexType.FLAT; } else if(indexType.equals("GPU_IVF_FLAT")){ INDEX_TYPE = IndexType.GPU_IVF_FLAT; } else if(indexType.equals("GPU_IVF_PQ")){ INDEX_TYPE = IndexType.GPU_IVF_PQ; } else if(indexType.equals("SCANN")){ INDEX_TYPE = IndexType.SCANN; } else { throw new IllegalArgumentException("暂不支持其他索引类型:" + indexType); } /** * **欧氏距离 (L2)**: 主要运用于计算机视觉领域。 * **内积 (IP)**: 主要运用于自然语言处理(NLP)领域。 * @date 2024-03-26 */ CreateIndexParam.Builder builder = CreateIndexParam.newBuilder(); builder.withCollectionName(tableName) .withFieldName(fieldName) .withIndexName(fieldName + "_index") .withIndexType(INDEX_TYPE) .withMetricType(MetricType.L2) // .withMetricType(MetricType.L2) .withExtraParam(indexParam) .withSyncMode(false); if(myMetricType == com.walker.support.milvus.MetricType.NLP){ builder.withMetricType(MetricType.IP); } else if(myMetricType == com.walker.support.milvus.MetricType.IMAGE){ builder.withMetricType(MetricType.L2); } else { throw new UnsupportedOperationException("暂时不支持距离类型:" + myMetricType); } R<RpcStatus> statusR = this.client.createIndex(builder.build()); return checkStatusR(statusR); walker-tcp/src/main/java/com/walker/tcp/netty/WebSocketNettyHandler.java
@@ -41,12 +41,15 @@ * 3) 引擎仅监听设置的端口(默认60000),通常在 nginx 配置中,代理该端口即可,如: * * ... * location /wss { * proxy_pass http://my.com:60000; * proxy_http_version 1.1; * proxy_set_header Upgrade $http_upgrade; * proxy_set_header Connection "Upgrade"; * } * * location /wss/ { * proxy_pass http://localhost:60001/; * proxy_http_version 1.1; * proxy_set_header Upgrade $http_upgrade; * proxy_set_header Connection "Upgrade"; * # proxy_set_header X-real-ip $remote_addr; * # proxy_set_header X-Forwarded-For $remote_addr; * } * * </pre> * @param uri walker-text-semantics/src/main/java/com/walker/semantics/InputWord.java
@@ -20,19 +20,19 @@ private String srcText; private List<WordMeta> wordList = new ArrayList<>(8); private int total = 0; // 总分词数量(包含重复的) // // 以下为分析语义使用的属性 // // 是否存在时间词,据此可决定是否解析时间 // private boolean hasWordTime = false; // 添加属性,用于根据词的名称查找元数据,2020-12-23 private Map<String, WordMeta> wordMetaCache = new HashMap<>(16); private int sceneContextId = 0; private String user; public InputWord(String srcWords){ if(SemanticsUtils.isEmpty(srcWords)){ throw new IllegalArgumentException("srcWords is required!"); @@ -46,6 +46,9 @@ sp = SpeechPart.toSpeechPart(t.getNatureStr()); if(sp.isFocus()){ this.addWordMeta(new WordMeta(t.getName(), sp)); } else { // System.out.println("不关注的词:" + t.getName()); // this.addWordMeta(new WordMeta(t.getName(), sp)); } } } @@ -60,11 +63,11 @@ this.wordList.add(wm); this.total++; } public String getSrcText(){ return this.srcText; } public List<WordMeta> getWordMetaList(){ return this.wordList; } @@ -76,7 +79,7 @@ public int getTotal() { return total; } /** * 返回词元的map对象,key = 单词 * @return @@ -84,7 +87,7 @@ public Map<String, WordMeta> getWordMetaMap(){ return this.wordMetaCache; } /** * 返回语句的长度 * @return @@ -92,7 +95,7 @@ public int getTextLength(){ return this.srcText.length(); } /** * 根据索引返回单词词元 * @param index @@ -104,7 +107,7 @@ } return this.wordList.get(index); } @Override public String toString(){ return this.wordList.toString(); walker-text-semantics/src/test/java/com/walker/semantics/TestInputWord.java
New file @@ -0,0 +1,46 @@ package com.walker.semantics; import com.walker.infrastructure.utils.StringUtils; import org.ansj.library.DicLibrary; import org.junit.Test; import java.util.List; public class TestInputWord { @Test public void printExtractDefaultKeywords(){ // DicLibrary.insert(DicLibrary.DEFAULT, "在职", SpeechPart.INDEX_MY_V, 1000); // DicLibrary.insert(DicLibrary.DEFAULT, "科目", SpeechPart.INDEX_MY_V, 1000); // String input = "人员信息表加载不正确,或者有时候加载不出信息,或者按钮点击无反应"; // String input = "人员表中工资合计列金额不正确"; // String input = "导入数据提示在职人员来源不可编辑"; // String input = "导入excel表格提示职务(职称)数据不合法"; // String input = "基础性,绩效,工资,填写,限制,奖励性,绩效,工资,填写,限制"; // String input = "公务通讯费用补贴有哪些填写限制"; // String input = "导入数据提示在职人员来源不可编辑"; // String input = "项目送审到主管部门/处室后,主管部门/主管处室看不到"; // String input = "项目已经终审了,为什么还未收到指标,收不到指标"; // String input = "期初数录入的时候没办法自定义会计科目的借方贷方吗"; // String input = "资产大类不再区分专用设备和通用设备,2022年年底资产相关余额结转至23年的期初该如何处理"; String input = "单位行政级别、文明单位类型、平时考核奖比例、事业单位绩效工资标准等要素显示不对"; InputWord inputWord = new InputWord(input); List<WordMeta> data = inputWord.getWordMetaList(); for(WordMeta wm : data){ System.out.println("key = " + wm.getText() + ", [" + wm.getSpeechPart() + "]"); } // String test = "统发,工资,邮储,银行,代发|代发行"; // String[] arrays = StringUtils.toArray(test); // for(String one : arrays){ // System.out.println("word = " + one); // if(one.indexOf("|") > 0){ // String[] s = one.split("\\|"); // for(String replace : s){ // System.out.println("-----> r = " + replace); // } // } // } } }