From 65c1714039bfe31b748e10ca5fb7c0b78a4829e5 Mon Sep 17 00:00:00 2001 From: shikeying <pxzsky@163.com> Date: 星期二, 26 三月 2024 18:06:36 +0800 Subject: [PATCH] 更新milvus库,添加测试方法 --- walker-support-milvus/src/main/java/com/walker/support/milvus/MetricType.java | 39 +++++++++++++ walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java | 32 +++++++++- walker-text-semantics/src/test/java/com/walker/semantics/TestInputWord.java | 46 +++++++++++++++ walker-support-milvus/pom.xml | 2 walker-tcp/src/main/java/com/walker/tcp/netty/WebSocketNettyHandler.java | 15 +++-- walker-support-milvus/src/main/java/com/walker/support/milvus/OperateService.java | 3 walker-text-semantics/src/main/java/com/walker/semantics/InputWord.java | 25 ++++--- 7 files changed, 138 insertions(+), 24 deletions(-) diff --git a/walker-support-milvus/pom.xml b/walker-support-milvus/pom.xml index 8894c32..0caf03e 100644 --- a/walker-support-milvus/pom.xml +++ b/walker-support-milvus/pom.xml @@ -14,7 +14,7 @@ <packaging>jar</packaging> <properties> - <milvus.version>2.1.0</milvus.version> + <milvus.version>2.3.4</milvus.version> </properties> <dependencies> diff --git a/walker-support-milvus/src/main/java/com/walker/support/milvus/MetricType.java b/walker-support-milvus/src/main/java/com/walker/support/milvus/MetricType.java new file mode 100644 index 0000000..17f2e78 --- /dev/null +++ b/walker-support-milvus/src/main/java/com/walker/support/milvus/MetricType.java @@ -0,0 +1,39 @@ +package com.walker.support.milvus; + +public enum MetricType { + + /** + * 閫傜敤浜庤嚜鐒惰瑷�澶勭悊鐨勮窛绂昏绠楋紝milvus涓搴�:'IP' + */ + NLP { + public String getIndex(){ + return INDEX_NLP; + } + }, + + /** + * 閫傜敤浜庢満鍣ㄨ瑙夌殑璺濈璁$畻锛宮ilvus涓搴�:'L2' + */ + IMAGE { + public String getIndex(){ + return INDEX_IMAGE; + } + }; + + public static final MetricType getType(String index){ + if(index.equals(INDEX_NLP)){ + return NLP; + } else if(index.equals(INDEX_IMAGE)){ + return IMAGE; + }else { + throw new IllegalArgumentException("涓嶆敮鎸佺殑璺濈绫诲瀷锛�" + index); + } + } + + public String getIndex(){ + throw new AbstractMethodError(); + } + + public static final String INDEX_NLP = "IP"; + public static final String INDEX_IMAGE = "L2"; +} diff --git a/walker-support-milvus/src/main/java/com/walker/support/milvus/OperateService.java b/walker-support-milvus/src/main/java/com/walker/support/milvus/OperateService.java index dd192cb..f487bcf 100644 --- a/walker-support-milvus/src/main/java/com/walker/support/milvus/OperateService.java +++ b/walker-support-milvus/src/main/java/com/walker/support/milvus/OperateService.java @@ -36,7 +36,8 @@ * @param indexType 绱㈠紩绫诲瀷锛屽弬鑰冿細milvus绱㈠紩绫诲瀷瀛楃涓诧紙IVF_FLAT/IVF_SQ8/IVF_PQ/HNSW/FLAT/ANNOY/绛夛級 * @param indexParam 绱㈠紩鍙傛暟锛屽父鐢ㄦ湁锛�"{\"nlist\":1024}" */ - boolean createIndex(String tableName, String fieldName, String indexType, String indexParam); + boolean createIndex(String tableName, String fieldName, String indexType, String indexParam + , com.walker.support.milvus.MetricType myMetricType); /** * 鍒犻櫎宸叉湁鐨勭储寮� diff --git a/walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java b/walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java index d32c455..0247b14 100644 --- a/walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java +++ b/walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java @@ -154,7 +154,8 @@ } @Override - public boolean createIndex(String tableName, String fieldName, String indexType, String indexParam){ + public boolean createIndex(String tableName, String fieldName, String indexType, String indexParam + , com.walker.support.milvus.MetricType myMetricType){ this.checkConnection(); IndexType INDEX_TYPE = null; if(indexType.equals("IVF_FLAT")){ @@ -165,22 +166,43 @@ INDEX_TYPE = IndexType.IVF_PQ; } else if(indexType.equals("HNSW")){ INDEX_TYPE = IndexType.HNSW; - } else if(indexType.equals("ANNOY")){ - INDEX_TYPE = IndexType.ANNOY; - } else if(indexType.equals("FLAT")){ + } + else if(indexType.equals("ANNOY")){ +// INDEX_TYPE = IndexType.ANNOY; + throw new UnsupportedOperationException("鏂扮増鏈凡涓嶆敮鎸侊細ANNOY"); + } + else if(indexType.equals("FLAT")){ INDEX_TYPE = IndexType.FLAT; + } else if(indexType.equals("GPU_IVF_FLAT")){ + INDEX_TYPE = IndexType.GPU_IVF_FLAT; + } else if(indexType.equals("GPU_IVF_PQ")){ + INDEX_TYPE = IndexType.GPU_IVF_PQ; + } else if(indexType.equals("SCANN")){ + INDEX_TYPE = IndexType.SCANN; } else { throw new IllegalArgumentException("鏆備笉鏀寔鍏朵粬绱㈠紩绫诲瀷锛�" + indexType); } + /** + * 鈥�**娆ф皬璺濈 (L2)**鈥�: 涓昏杩愮敤浜庤绠楁満瑙嗚棰嗗煙銆� + * 鈥�**鍐呯Н (IP)**鈥�: 涓昏杩愮敤浜庤嚜鐒惰瑷�澶勭悊锛圢LP锛夐鍩熴�� + * @date 2024-03-26 + */ CreateIndexParam.Builder builder = CreateIndexParam.newBuilder(); builder.withCollectionName(tableName) .withFieldName(fieldName) .withIndexName(fieldName + "_index") .withIndexType(INDEX_TYPE) - .withMetricType(MetricType.L2) +// .withMetricType(MetricType.L2) .withExtraParam(indexParam) .withSyncMode(false); + if(myMetricType == com.walker.support.milvus.MetricType.NLP){ + builder.withMetricType(MetricType.IP); + } else if(myMetricType == com.walker.support.milvus.MetricType.IMAGE){ + builder.withMetricType(MetricType.L2); + } else { + throw new UnsupportedOperationException("鏆傛椂涓嶆敮鎸佽窛绂荤被鍨嬶細" + myMetricType); + } R<RpcStatus> statusR = this.client.createIndex(builder.build()); return checkStatusR(statusR); diff --git a/walker-tcp/src/main/java/com/walker/tcp/netty/WebSocketNettyHandler.java b/walker-tcp/src/main/java/com/walker/tcp/netty/WebSocketNettyHandler.java index ffa3481..cf7bae4 100644 --- a/walker-tcp/src/main/java/com/walker/tcp/netty/WebSocketNettyHandler.java +++ b/walker-tcp/src/main/java/com/walker/tcp/netty/WebSocketNettyHandler.java @@ -41,12 +41,15 @@ * 3) 寮曟搸浠呯洃鍚缃殑绔彛锛堥粯璁�60000锛夛紝閫氬父鍦� nginx 閰嶇疆涓紝浠g悊璇ョ鍙e嵆鍙紝濡傦細 * * ... - * location /wss { - * proxy_pass http://my.com:60000; - * proxy_http_version 1.1; - * proxy_set_header Upgrade $http_upgrade; - * proxy_set_header Connection "Upgrade"; - * } + * + * location /wss/ { + * proxy_pass http://localhost:60001/; + * proxy_http_version 1.1; + * proxy_set_header Upgrade $http_upgrade; + * proxy_set_header Connection "Upgrade"; + * # proxy_set_header X-real-ip $remote_addr; + * # proxy_set_header X-Forwarded-For $remote_addr; + * } * * </pre> * @param uri diff --git a/walker-text-semantics/src/main/java/com/walker/semantics/InputWord.java b/walker-text-semantics/src/main/java/com/walker/semantics/InputWord.java index 0154f51..bc075c1 100644 --- a/walker-text-semantics/src/main/java/com/walker/semantics/InputWord.java +++ b/walker-text-semantics/src/main/java/com/walker/semantics/InputWord.java @@ -20,19 +20,19 @@ private String srcText; private List<WordMeta> wordList = new ArrayList<>(8); - + private int total = 0; // 鎬诲垎璇嶆暟閲忥紙鍖呭惈閲嶅鐨勶級 - + // // 浠ヤ笅涓哄垎鏋愯涔変娇鐢ㄧ殑灞炴�� // // 鏄惁瀛樺湪鏃堕棿璇嶏紝鎹鍙喅瀹氭槸鍚﹁В鏋愭椂闂� // private boolean hasWordTime = false; - + // 娣诲姞灞炴�э紝鐢ㄤ簬鏍规嵁璇嶇殑鍚嶇О鏌ユ壘鍏冩暟鎹紝2020-12-23 private Map<String, WordMeta> wordMetaCache = new HashMap<>(16); - + private int sceneContextId = 0; private String user; - + public InputWord(String srcWords){ if(SemanticsUtils.isEmpty(srcWords)){ throw new IllegalArgumentException("srcWords is required!"); @@ -46,6 +46,9 @@ sp = SpeechPart.toSpeechPart(t.getNatureStr()); if(sp.isFocus()){ this.addWordMeta(new WordMeta(t.getName(), sp)); + } else { +// System.out.println("涓嶅叧娉ㄧ殑璇嶏細" + t.getName()); +// this.addWordMeta(new WordMeta(t.getName(), sp)); } } } @@ -60,11 +63,11 @@ this.wordList.add(wm); this.total++; } - + public String getSrcText(){ return this.srcText; } - + public List<WordMeta> getWordMetaList(){ return this.wordList; } @@ -76,7 +79,7 @@ public int getTotal() { return total; } - + /** * 杩斿洖璇嶅厓鐨刴ap瀵硅薄锛宬ey = 鍗曡瘝 * @return @@ -84,7 +87,7 @@ public Map<String, WordMeta> getWordMetaMap(){ return this.wordMetaCache; } - + /** * 杩斿洖璇彞鐨勯暱搴� * @return @@ -92,7 +95,7 @@ public int getTextLength(){ return this.srcText.length(); } - + /** * 鏍规嵁绱㈠紩杩斿洖鍗曡瘝璇嶅厓 * @param index @@ -104,7 +107,7 @@ } return this.wordList.get(index); } - + @Override public String toString(){ return this.wordList.toString(); diff --git a/walker-text-semantics/src/test/java/com/walker/semantics/TestInputWord.java b/walker-text-semantics/src/test/java/com/walker/semantics/TestInputWord.java new file mode 100644 index 0000000..5f5d8dc --- /dev/null +++ b/walker-text-semantics/src/test/java/com/walker/semantics/TestInputWord.java @@ -0,0 +1,46 @@ +package com.walker.semantics; + +import com.walker.infrastructure.utils.StringUtils; +import org.ansj.library.DicLibrary; +import org.junit.Test; + +import java.util.List; + +public class TestInputWord { + + @Test + public void printExtractDefaultKeywords(){ +// DicLibrary.insert(DicLibrary.DEFAULT, "鍦ㄨ亴", SpeechPart.INDEX_MY_V, 1000); +// DicLibrary.insert(DicLibrary.DEFAULT, "绉戠洰", SpeechPart.INDEX_MY_V, 1000); + +// String input = "浜哄憳淇℃伅琛ㄥ姞杞戒笉姝g‘锛屾垨鑰呮湁鏃跺�欏姞杞戒笉鍑轰俊鎭紝鎴栬�呮寜閽偣鍑绘棤鍙嶅簲"; +// String input = "浜哄憳琛ㄤ腑宸ヨ祫鍚堣鍒楅噾棰濅笉姝g‘"; +// String input = "瀵煎叆鏁版嵁鎻愮ず鍦ㄨ亴浜哄憳鏉ユ簮涓嶅彲缂栬緫"; +// String input = "瀵煎叆excel琛ㄦ牸鎻愮ず鑱屽姟(鑱岀О)鏁版嵁涓嶅悎娉�"; +// String input = "鍩虹鎬�,缁╂晥,宸ヨ祫,濉啓,闄愬埗锛屽鍔辨��,缁╂晥,宸ヨ祫,濉啓,闄愬埗"; +// String input = "鍏姟閫氳璐圭敤琛ヨ创鏈夊摢浜涘~鍐欓檺鍒�"; +// String input = "瀵煎叆鏁版嵁鎻愮ず鍦ㄨ亴浜哄憳鏉ユ簮涓嶅彲缂栬緫"; +// String input = "椤圭洰閫佸鍒颁富绠¢儴闂�/澶勫鍚庯紝涓荤閮ㄩ棬/涓荤澶勫鐪嬩笉鍒�"; +// String input = "椤圭洰宸茬粡缁堝浜嗭紝涓轰粈涔堣繕鏈敹鍒版寚鏍囷紝鏀朵笉鍒版寚鏍�"; +// String input = "鏈熷垵鏁板綍鍏ョ殑鏃跺�欐病鍔炴硶鑷畾涔変細璁$鐩殑鍊熸柟璐锋柟鍚�"; +// String input = "璧勪骇澶х被涓嶅啀鍖哄垎涓撶敤璁惧鍜岄�氱敤璁惧锛�2022骞村勾搴曡祫浜х浉鍏充綑棰濈粨杞嚦23骞寸殑鏈熷垵璇ュ浣曞鐞�"; + String input = "鍗曚綅琛屾斂绾у埆銆佹枃鏄庡崟浣嶇被鍨嬨�佸钩鏃惰�冩牳濂栨瘮渚嬨�佷簨涓氬崟浣嶇哗鏁堝伐璧勬爣鍑嗙瓑瑕佺礌鏄剧ず涓嶅"; + InputWord inputWord = new InputWord(input); + List<WordMeta> data = inputWord.getWordMetaList(); + for(WordMeta wm : data){ + System.out.println("key = " + wm.getText() + ", [" + wm.getSpeechPart() + "]"); + } + +// String test = "缁熷彂,宸ヨ祫,閭偍,閾惰,浠e彂|浠e彂琛�"; +// String[] arrays = StringUtils.toArray(test); +// for(String one : arrays){ +// System.out.println("word = " + one); +// if(one.indexOf("|") > 0){ +// String[] s = one.split("\\|"); +// for(String replace : s){ +// System.out.println("-----> r = " + replace); +// } +// } +// } + } +} -- Gitblit v1.9.1