walker-support-milvus/pom.xml
@@ -18,6 +18,13 @@ </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <scope>test</scope> </dependency> <dependency> <groupId>io.milvus</groupId> <artifactId>milvus-sdk-java</artifactId> walker-support-milvus/src/main/java/com/walker/support/milvus/DataType.java
@@ -21,9 +21,24 @@ return Int64_VALUE; } }, /** * 废弃了该字符串类型,使用:VarChar。 * @date 2024-03-28 */ @Deprecated String{ public int getIndex(){ return String_VALUE; } }, /** * 新版使用的字符串类型 * @date 2024-03-28 */ VarChar { public int getIndex(){ return VarChar_VALUE; } }, FloatVector{ @@ -31,6 +46,12 @@ return FloatVector_VALUE; } }, Float16Vector { public int getIndex(){ return Float16Vector_VALUE; } }, BinaryVector{ public int getIndex(){ return BinaryVector_VALUE; @@ -39,6 +60,18 @@ None{ public int getIndex(){ return None_VALUE; } }, Array { public int getIndex(){ return Array_VALUE; } }, Json { public int getIndex(){ return JSON_VALUE; } }; @@ -63,7 +96,15 @@ return BinaryVector; } else if(index == None_VALUE){ return None; } else { } else if(index == VarChar_VALUE){ return VarChar; } else if(index == Array_VALUE){ return Array; } else if(index == JSON_VALUE){ return Json; } else if(index == Float16Vector_VALUE){ return Float16Vector; } else { throw new IllegalArgumentException("不支持的数据格式:" + index); } } @@ -77,7 +118,10 @@ public static final int Float_VALUE = 10; public static final int Double_VALUE = 11; public static final int String_VALUE = 20; // public static final int VarChar_VALUE = 21; public static final int VarChar_VALUE = 21; public static final int Array_VALUE = 22; public static final int JSON_VALUE = 23; public static final int BinaryVector_VALUE = 100; public static final int FloatVector_VALUE = 101; public static final int Float16Vector_VALUE = 102; } walker-support-milvus/src/main/java/com/walker/support/milvus/FieldType.java
@@ -2,7 +2,8 @@ //import io.milvus.param.ParamUtils; import org.apache.commons.lang3.StringUtils; import io.milvus.param.ParamUtils; import org.checkerframework.checker.nullness.qual.NonNull; import java.util.HashMap; import java.util.Map; @@ -15,6 +16,9 @@ private String description; private DataType dataType; private Map<String, String> typeParams; // 2024-03-28 private DataType elementType; private boolean partitionKey; private FieldType(FieldType.Builder builder) { if (builder == null) { @@ -26,6 +30,8 @@ this.dataType = builder.dataType; this.typeParams = builder.typeParams; // this.autoID = builder.autoID; this.elementType = builder.elementType; this.partitionKey = builder.partitionKey; } } @@ -65,12 +71,16 @@ private DataType dataType; private final Map<String, String> typeParams; // private boolean autoID; private DataType elementType; private boolean partitionKey; private Builder() { this.primaryKey = false; this.description = ""; this.typeParams = new HashMap(); // this.autoID = false; this.elementType = DataType.None; this.partitionKey = false; } public Builder withName(String name) { @@ -101,6 +111,15 @@ throw new NullPointerException("dataType is marked non-null but is null"); } else { this.dataType = dataType; return this; } } public Builder withElementType(@NonNull DataType elementType) { if (elementType == null) { throw new NullPointerException("elementType is marked non-null but is null"); } else { this.elementType = elementType; return this; } } @@ -148,52 +167,109 @@ // this.autoID = autoID; // return this; // } public Builder withPartitionKey(boolean partitionKey) { this.partitionKey = partitionKey; return this; } public FieldType build() throws ParamException { // ParamUtils.CheckNullEmptyString(this.name, "Field name"); if (this.name == null || StringUtils.isBlank(this.name)) { throw new ParamException(name + " cannot be null or empty"); } ParamUtils.CheckNullEmptyString(this.name, "Field name"); if (this.dataType != null && this.dataType != DataType.None) { if (this.dataType == DataType.String) { throw new ParamException("String type is not supported, use VarChar instead"); throw new io.milvus.exception.ParamException("String type is not supported, use Varchar instead"); } else { int len; if (this.dataType == DataType.FloatVector || this.dataType == DataType.BinaryVector) { if (!this.typeParams.containsKey("dim")) { throw new ParamException("Vector field dimension must be specified"); throw new io.milvus.exception.ParamException("Vector field dimension must be specified"); } try { len = Integer.parseInt((String)this.typeParams.get("dim")); if (len <= 0) { throw new ParamException("Vector field dimension must be larger than zero"); throw new io.milvus.exception.ParamException("Vector field dimension must be larger than zero"); } } catch (NumberFormatException var3) { throw new ParamException("Vector field dimension must be an integer number"); throw new io.milvus.exception.ParamException("Vector field dimension must be an integer number"); } } // if (this.dataType == io.milvus.grpc.DataType.VarChar) { // if (!this.typeParams.containsKey("max_length")) { // throw new ParamException("Varchar field max length must be specified"); // } // // try { // len = Integer.parseInt((String)this.typeParams.get("max_length")); // if (len <= 0) { // throw new ParamException("Varchar field max length must be larger than zero"); // } // } catch (NumberFormatException var2) { // throw new ParamException("Varchar field max length must be an integer number"); // } // } if (this.dataType == DataType.VarChar) { if (!this.typeParams.containsKey("max_length")) { throw new io.milvus.exception.ParamException("Varchar field max length must be specified"); } try { len = Integer.parseInt((String)this.typeParams.get("max_length")); if (len <= 0) { throw new io.milvus.exception.ParamException("Varchar field max length must be larger than zero"); } } catch (NumberFormatException var2) { throw new io.milvus.exception.ParamException("Varchar field max length must be an integer number"); } } if (this.partitionKey) { if (this.primaryKey) { throw new io.milvus.exception.ParamException("Primary key field can not be partition key"); } if (this.dataType != DataType.Long && this.dataType != DataType.VarChar) { throw new io.milvus.exception.ParamException("Only Int64 and Varchar type field can be partition key"); } } if (this.dataType == DataType.Array) { if (this.elementType == DataType.String) { throw new io.milvus.exception.ParamException("String type is not supported, use Varchar instead"); } if (this.elementType == DataType.None || this.elementType == DataType.Array || this.elementType == DataType.Json || this.elementType == DataType.String || this.elementType == DataType.FloatVector || this.elementType == DataType.Float16Vector || this.elementType == DataType.BinaryVector) { throw new io.milvus.exception.ParamException("Unsupported element type"); } if (!this.typeParams.containsKey("max_capacity")) { throw new io.milvus.exception.ParamException("Array field max capacity must be specified"); } if (this.elementType == DataType.VarChar && !this.typeParams.containsKey("max_length")) { throw new io.milvus.exception.ParamException("Varchar array field max length must be specified"); } } return new FieldType(this); } } else { throw new ParamException("Field data type is illegal"); throw new io.milvus.exception.ParamException("Field data type is illegal"); } // if (this.name == null || StringUtils.isBlank(this.name)) { // throw new ParamException(name + " cannot be null or empty"); // } // if (this.dataType != null && this.dataType != DataType.None) { // if (this.dataType == DataType.String) { // throw new ParamException("String type is not supported, use VarChar instead"); // } else { // int len; // if (this.dataType == DataType.FloatVector || this.dataType == DataType.BinaryVector) { // if (!this.typeParams.containsKey("dim")) { // throw new ParamException("Vector field dimension must be specified"); // } // // try { // len = Integer.parseInt((String)this.typeParams.get("dim")); // if (len <= 0) { // throw new ParamException("Vector field dimension must be larger than zero"); // } // } catch (NumberFormatException var3) { // throw new ParamException("Vector field dimension must be an integer number"); // } // } // return new FieldType(this); // } // } else { // throw new ParamException("Field data type is illegal"); // } } } } walker-support-milvus/src/main/java/com/walker/support/milvus/OutData.java
@@ -23,7 +23,7 @@ private List<Long> keyList = new ArrayList<>(); private List<Long> businessIdList = new ArrayList<>(); private List<String> businessIdList = new ArrayList<>(); /** * 根据milvus数据库主键返回记录对应的预测分值。 @@ -96,11 +96,11 @@ * 返回业务编号集合。 * @return */ public List<Long> getBusinessIdList() { public List<String> getBusinessIdList() { return businessIdList; } public void setBusinessIdList(List<Long> businessIdList) { public void setBusinessIdList(List<String> businessIdList) { this.businessIdList = businessIdList; } @@ -111,7 +111,7 @@ public static class Data implements Serializable { private long key; private long businessId; private String businessId; private float score = 0; @@ -119,7 +119,7 @@ return key; } public long getBusinessId() { public String getBusinessId() { return businessId; } @@ -127,7 +127,7 @@ return score; } public Data(long key, long businessId, float score){ public Data(long key, String businessId, float score){ this.key = key; this.businessId = businessId; this.score = score; walker-support-milvus/src/main/java/com/walker/support/milvus/engine/DefaultOperateService.java
@@ -147,7 +147,7 @@ return false; } if(statusR.getStatus().intValue() == R.Status.Success.getCode()){ logger.error("insert 返回值:" + statusR.getStatus().intValue()); logger.debug("insert 返回值:" + statusR.getStatus().intValue()); return true; } return false; @@ -254,6 +254,12 @@ MetricType metricType = null; if(query.getMetricType() == null || query.getMetricType().equals("")){ metricType = MetricType.L2; } else if(query.getMetricType().equals(com.walker.support.milvus.MetricType.INDEX_IMAGE)){ metricType = MetricType.L2; } else if(query.getMetricType().equals(com.walker.support.milvus.MetricType.INDEX_NLP)){ metricType = MetricType.IP; } else { throw new UnsupportedOperationException("暂未支持的距离类型:" + query.getMetricType()); } SearchParam searchParam = SearchParam.newBuilder() @@ -287,7 +293,7 @@ if(outField.equals("id")){ outData.setKeyList((List<Long>)wrapperSearch.getFieldData("id", 0)); } else { outData.setBusinessIdList((List<Long>)wrapperSearch.getFieldData(outField, 0)); outData.setBusinessIdList((List<String>)wrapperSearch.getFieldData(outField, 0)); } } // System.out.println(wrapperSearch.getFieldData("book_id", 0)); walker-support-milvus/src/main/java/com/walker/support/milvus/util/FieldTypeUtils.java
@@ -19,7 +19,10 @@ public static DataType toMilvusDataType(com.walker.support.milvus.DataType dt){ if(dt == com.walker.support.milvus.DataType.String){ return DataType.String; // return DataType.String; return DataType.VarChar; } else if(dt == com.walker.support.milvus.DataType.VarChar){ return DataType.VarChar; } else if(dt == com.walker.support.milvus.DataType.Float){ return DataType.Float; } else if(dt == com.walker.support.milvus.DataType.Double){ walker-support-milvus/src/test/java/com/walker/support/milvus/MilvusEngine.java
New file @@ -0,0 +1,108 @@ package com.walker.support.milvus; import com.walker.support.milvus.engine.DefaultOperateService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; public class MilvusEngine { protected final transient Logger logger = LoggerFactory.getLogger(this.getClass()); public MilvusEngine(String ip, int port){ DefaultOperateService service = new DefaultOperateService(); service.connect(ip, port); this.operateService = service; logger.info("connect milvus: {}:{}", ip, port); } public void close(){ if(this.operateService != null){ this.operateService.close(); } } /** * 创建表:测试从聊天一键提取工单内容使用。 * <pre> * 1) 从历史工单数据中,收集用户提问内容,整理到表中 * 2) 把这些数据通过向量转化,写入milvus数据库。 * </pre> * @date 2024-03-28 */ public void createChatSimilarTable(){ Table chatSimilarTable = new Table(); chatSimilarTable.setCollectionName("chat_similar"); chatSimilarTable.setDescription("聊天提取工单摘要历史数据"); chatSimilarTable.setShardsNum(1); chatSimilarTable.setDimension(768); // 这个是根据使用向量模型维度定的 // 设置字段 FieldType id = FieldType.newBuilder() .withName("id").withPrimaryKey(true).withMaxLength(18).withDataType(DataType.Long).build(); FieldType title = FieldType.newBuilder() .withName("title").withPrimaryKey(false).withMaxLength(180).withDataType(DataType.VarChar).build(); FieldType content = FieldType.newBuilder() .withName("content").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build(); FieldType answer = FieldType.newBuilder() .withName("answer").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build(); FieldType embedding = FieldType.newBuilder() .withName("embedding").withPrimaryKey(false).withDataType(DataType.FloatVector).withDimension(768).build(); List<FieldType> fieldTypeList = new ArrayList<>(8); fieldTypeList.add(id); fieldTypeList.add(title); fieldTypeList.add(content); fieldTypeList.add(answer); fieldTypeList.add(embedding); chatSimilarTable.setFieldTypes(fieldTypeList); this.operateService.createTable(chatSimilarTable); logger.info("创建了 table = {}", chatSimilarTable.getCollectionName()); // 创建索引 this.operateService.createIndex(chatSimilarTable.getCollectionName() , "embedding", "HNSW", "{\"nlist\":16384, \"efConstruction\":128, \"M\":8}", MetricType.NLP); logger.info("创建了 index = {}", chatSimilarTable.getCollectionName() + "_index"); } public void dropChatSimilarTable(){ this.operateService.dropTable("chat_similar"); } public void insertTestData(){ DataSet dataSet = new DataSet(); dataSet.setTableName("chat_similar"); Map<String, List<?>> fieldMap = new HashMap(); fieldMap.put("id", Arrays.asList(new Long[]{100L})); fieldMap.put("title", Arrays.asList(new String[]{"第一个标题"})); fieldMap.put("content", Arrays.asList(new String[]{"第一个内容"})); fieldMap.put("answer", Arrays.asList(new String[]{"第一个答案"})); fieldMap.put("embedding", Arrays.asList(mockVector)); dataSet.setFields(fieldMap); this.operateService.insertDataSet(dataSet); logger.info("写入了测试数据: {}", dataSet); } public OutData searchChatSimilar(List<List<Float>> vectors){ Query query = new Query(); query.setMetricType(MetricType.NLP.getIndex()); query.setTableName("chat_similar"); query.setTopK(4); query.setVectorName("embedding"); query.setOutFieldList(Arrays.asList(new String[]{"id","title","content"})); query.setFieldPrimaryKey("id"); query.setSearchVectors(vectors); return this.operateService.searchVector(query); } private OperateService operateService; private double[] mockVector = new double[]{-0.051114246249198914, 0.889954432}; } walker-support-milvus/src/test/java/com/walker/support/milvus/TestMilvus.java
New file @@ -0,0 +1,45 @@ package com.walker.support.milvus; import org.junit.Test; public class TestMilvus { public void testSearchMilvus(){ } // @Test public void testFloatParse(){ double d = 0.013319502584636211; float f = Float.parseFloat(String.valueOf(d)); System.out.println("f = " + f); } /** * 创建聊天中提取工单摘要功能,milvus测试表创建。 * <p>第一步:创建表和索引</p> */ // @Test public void createTable(){ this.acquireMilvusEngine(); this.milvusEngine.createChatSimilarTable(); } /** * 测试:写入一个模拟数据 */ // @Test public void insertTestMockData(){ this.acquireMilvusEngine(); this.milvusEngine.insertTestData(); } private void acquireMilvusEngine(){ if(this.milvusEngine == null){ MilvusEngine engine = new MilvusEngine("120.26.128.84", 19530); this.milvusEngine = engine; } } private MilvusEngine milvusEngine; } walker-web/pom.xml
@@ -14,6 +14,7 @@ <packaging>jar</packaging> <properties> <ip2region.version>2.7.0</ip2region.version> </properties> <dependencies> @@ -55,6 +56,19 @@ <artifactId>okhttp</artifactId> </dependency> <!-- IP定位,一个开源ip与地址对应的本地数据库xdb,2024-04-02 --> <dependency> <groupId>org.lionsoul</groupId> <artifactId>ip2region</artifactId> <version>${ip2region.version}</version> <exclusions> <exclusion> <groupId>junit</groupId> <artifactId>junit</artifactId> </exclusion> </exclusions> </dependency> </dependencies> </project> walker-web/src/main/java/com/walker/web/agent/BrowserCapWebAgentService.java
@@ -3,6 +3,7 @@ import com.walker.web.WebAgentService; import com.walker.web.WebUserAgent; import com.walker.web.util.IpUtils; import org.lionsoul.ip2region.xdb.Searcher; import javax.servlet.http.HttpServletRequest; import java.util.Arrays; @@ -15,6 +16,9 @@ private Capabilities capabilities = null; private UserAgentParser parser = null; // 搜索地区的本地对象(读取本地xdb数据库),2024-04-02 private Searcher searcher = null; public BrowserCapWebAgentService(){ try { @@ -30,6 +34,11 @@ } catch (Exception e) { throw new RuntimeException("创建浏览器解析对象错误:" + e.getMessage(), e); } // this.searcher = Searcher.newWithFileOnly(); // ClassPathResource resource = new ClassPathResource(); // resource.getFile(); // RandomAccessFile randomAccessFile = new RandomAccessFile(); } @Override @@ -59,6 +68,7 @@ return webUserAgent; } /** * 设置是否加载ip对应的区域地质,默认:false不加载,因为需要连外网查找。 * @param loadLocation @@ -68,5 +78,14 @@ this.loadLocation = loadLocation; } /** * 设置:ip2region数据库文件路径,如: * @param xdbPath */ public void setXdbPath(String xdbPath) { this.xdbPath = xdbPath; } private String xdbPath; private boolean loadLocation = false; } walker-web/src/main/java/com/walker/web/util/IpUtils.java
@@ -32,37 +32,69 @@ * * @param request 请求对象 * @return IP地址 * @date 2023-10-21 * @date 2024-04-02 增加获取请求头中参数(补充) */ public static String getIpAddr(HttpServletRequest request) { if (request == null) { public static String getIpAddr(HttpServletRequest request) { if (request == null) { return "unknown"; } String ip = request.getHeader("x-forwarded-for"); if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("Proxy-Client-IP"); } if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { String ip = null; // 以下两个获取在k8s中,将真实的客户端IP,放到了x-Original-Forwarded-For。而将WAF的回源地址放到了 x-Forwarded-For了。 ip = request.getHeader("X-Original-Forwarded-For"); if (ip == null || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("X-Forwarded-For"); } if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { //获取nginx等代理的ip if (ip == null || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("x-forwarded-for"); } if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("Proxy-Client-IP"); } if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("WL-Proxy-Client-IP"); } if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { if (ip == null || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("HTTP_CLIENT_IP"); } if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("X-Real-IP"); } if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getRemoteAddr(); if (ip == null || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getHeader("HTTP_X_FORWARDED_FOR"); } return "0:0:0:0:0:0:0:1".equals(ip) ? LOCALHOST : getMultistageReverseProxyIp(ip); // 2.如果没有转发的ip,则取当前通信的请求端的ip(兼容k8s集群获取ip) if (ip == null || ip.length() == 0 || UNKNOWN_TEXT.equalsIgnoreCase(ip)) { ip = request.getRemoteAddr(); // 如果是127.0.0.1,则取本地真实ip if (LOCALHOST.equals(ip)) { // 根据网卡取本机配置的IP InetAddress inet = null; try { inet = InetAddress.getLocalHost(); ip = inet.getHostAddress(); } catch (UnknownHostException e) { e.printStackTrace(); } } } // 对于通过多个代理的情况,第一个IP为客户端真实IP,多个IP按照','分割 if (ip != null && ip.length() > 15) { // = 15 if (ip.indexOf(StringUtils.DEFAULT_SPLIT_SEPARATOR) > 0) { ip = ip.substring(0, ip.indexOf(StringUtils.DEFAULT_SPLIT_SEPARATOR)); } } // return "0:0:0:0:0:0:0:1".equals(ip) ? LOCALHOST : getMultistageReverseProxyIp(ip); return "0:0:0:0:0:0:0:1".equals(ip) ? LOCALHOST : ip; } /** walker-web/src/main/resources/ip2region.xdbBinary files differ