package com.walker.support.milvus; import com.walker.support.milvus.engine.DefaultOperateService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; public class MilvusEngine { protected final transient Logger logger = LoggerFactory.getLogger(this.getClass()); public MilvusEngine(String ip, int port){ DefaultOperateService service = new DefaultOperateService(); service.connect(ip, port); this.operateService = service; logger.info("connect milvus: {}:{}", ip, port); } public void close(){ if(this.operateService != null){ this.operateService.close(); } } /** * 创建表:测试从聊天一键提取工单内容使用。 *
     *     1) 从历史工单数据中,收集用户提问内容,整理到表中
     *     2) 把这些数据通过向量转化,写入milvus数据库。
     * 
* @date 2024-03-28 */ public void createChatSimilarTable(){ Table chatSimilarTable = new Table(); chatSimilarTable.setCollectionName("chat_similar"); chatSimilarTable.setDescription("聊天提取工单摘要历史数据"); chatSimilarTable.setShardsNum(1); chatSimilarTable.setDimension(768); // 这个是根据使用向量模型维度定的 // 设置字段 FieldType id = FieldType.newBuilder() .withName("id").withPrimaryKey(true).withMaxLength(18).withDataType(DataType.Long).build(); FieldType title = FieldType.newBuilder() .withName("title").withPrimaryKey(false).withMaxLength(180).withDataType(DataType.VarChar).build(); FieldType content = FieldType.newBuilder() .withName("content").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build(); FieldType answer = FieldType.newBuilder() .withName("answer").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build(); FieldType embedding = FieldType.newBuilder() .withName("embedding").withPrimaryKey(false).withDataType(DataType.FloatVector).withDimension(768).build(); List fieldTypeList = new ArrayList<>(8); fieldTypeList.add(id); fieldTypeList.add(title); fieldTypeList.add(content); fieldTypeList.add(answer); fieldTypeList.add(embedding); chatSimilarTable.setFieldTypes(fieldTypeList); this.operateService.createTable(chatSimilarTable); logger.info("创建了 table = {}", chatSimilarTable.getCollectionName()); // 创建索引 this.operateService.createIndex(chatSimilarTable.getCollectionName() , "embedding", "HNSW", "{\"nlist\":16384, \"efConstruction\":128, \"M\":8}", MetricType.NLP); logger.info("创建了 index = {}", chatSimilarTable.getCollectionName() + "_index"); } public void dropChatSimilarTable(){ this.operateService.dropTable("chat_similar"); } public void insertTestData(){ DataSet dataSet = new DataSet(); dataSet.setTableName("chat_similar"); Map> fieldMap = new HashMap(); fieldMap.put("id", Arrays.asList(new Long[]{100L})); fieldMap.put("title", Arrays.asList(new String[]{"第一个标题"})); fieldMap.put("content", Arrays.asList(new String[]{"第一个内容"})); fieldMap.put("answer", Arrays.asList(new String[]{"第一个答案"})); fieldMap.put("embedding", Arrays.asList(mockVector)); dataSet.setFields(fieldMap); this.operateService.insertDataSet(dataSet); logger.info("写入了测试数据: {}", dataSet); } public OutData searchChatSimilar(List> vectors){ Query query = new Query(); query.setMetricType(MetricType.NLP.getIndex()); query.setTableName("chat_similar"); query.setTopK(4); query.setVectorName("embedding"); query.setOutFieldList(Arrays.asList(new String[]{"id","title","content"})); query.setFieldPrimaryKey("id"); query.setSearchVectors(vectors); return this.operateService.searchVector(query); } private OperateService operateService; private double[] mockVector = new double[]{-0.051114246249198914, 0.889954432}; }