package com.walker.support.milvus;
|
|
import com.walker.support.milvus.engine.DefaultOperateService;
|
import org.slf4j.Logger;
|
import org.slf4j.LoggerFactory;
|
|
import java.util.ArrayList;
|
import java.util.Arrays;
|
import java.util.HashMap;
|
import java.util.List;
|
import java.util.Map;
|
|
public class MilvusEngine {
|
|
protected final transient Logger logger = LoggerFactory.getLogger(this.getClass());
|
|
public MilvusEngine(String ip, int port){
|
DefaultOperateService service = new DefaultOperateService();
|
service.connect(ip, port);
|
this.operateService = service;
|
logger.info("connect milvus: {}:{}", ip, port);
|
}
|
|
public void close(){
|
if(this.operateService != null){
|
this.operateService.close();
|
}
|
}
|
|
/**
|
* 创建表:测试从聊天一键提取工单内容使用。
|
* <pre>
|
* 1) 从历史工单数据中,收集用户提问内容,整理到表中
|
* 2) 把这些数据通过向量转化,写入milvus数据库。
|
* </pre>
|
* @date 2024-03-28
|
*/
|
public void createChatSimilarTable(){
|
Table chatSimilarTable = new Table();
|
chatSimilarTable.setCollectionName("chat_similar");
|
chatSimilarTable.setDescription("聊天提取工单摘要历史数据");
|
chatSimilarTable.setShardsNum(1);
|
chatSimilarTable.setDimension(768); // 这个是根据使用向量模型维度定的
|
|
// 设置字段
|
FieldType id = FieldType.newBuilder()
|
.withName("id").withPrimaryKey(true).withMaxLength(18).withDataType(DataType.Long).build();
|
FieldType title = FieldType.newBuilder()
|
.withName("title").withPrimaryKey(false).withMaxLength(180).withDataType(DataType.VarChar).build();
|
FieldType content = FieldType.newBuilder()
|
.withName("content").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build();
|
FieldType answer = FieldType.newBuilder()
|
.withName("answer").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build();
|
FieldType embedding = FieldType.newBuilder()
|
.withName("embedding").withPrimaryKey(false).withDataType(DataType.FloatVector).withDimension(768).build();
|
|
List<FieldType> fieldTypeList = new ArrayList<>(8);
|
fieldTypeList.add(id);
|
fieldTypeList.add(title);
|
fieldTypeList.add(content);
|
fieldTypeList.add(answer);
|
fieldTypeList.add(embedding);
|
chatSimilarTable.setFieldTypes(fieldTypeList);
|
|
this.operateService.createTable(chatSimilarTable);
|
logger.info("创建了 table = {}", chatSimilarTable.getCollectionName());
|
|
// 创建索引
|
this.operateService.createIndex(chatSimilarTable.getCollectionName()
|
, "embedding", "HNSW", "{\"nlist\":16384, \"efConstruction\":128, \"M\":8}", MetricType.NLP);
|
logger.info("创建了 index = {}", chatSimilarTable.getCollectionName() + "_index");
|
}
|
|
public void dropChatSimilarTable(){
|
this.operateService.dropTable("chat_similar");
|
}
|
|
public void insertTestData(){
|
DataSet dataSet = new DataSet();
|
dataSet.setTableName("chat_similar");
|
|
Map<String, List<?>> fieldMap = new HashMap();
|
fieldMap.put("id", Arrays.asList(new Long[]{100L}));
|
fieldMap.put("title", Arrays.asList(new String[]{"第一个标题"}));
|
fieldMap.put("content", Arrays.asList(new String[]{"第一个内容"}));
|
fieldMap.put("answer", Arrays.asList(new String[]{"第一个答案"}));
|
fieldMap.put("embedding", Arrays.asList(mockVector));
|
dataSet.setFields(fieldMap);
|
this.operateService.insertDataSet(dataSet);
|
logger.info("写入了测试数据: {}", dataSet);
|
}
|
|
public OutData searchChatSimilar(List<List<Float>> vectors){
|
Query query = new Query();
|
query.setMetricType(MetricType.NLP.getIndex());
|
query.setTableName("chat_similar");
|
query.setTopK(4);
|
query.setVectorName("embedding");
|
query.setOutFieldList(Arrays.asList(new String[]{"id","title","content"}));
|
query.setFieldPrimaryKey("id");
|
query.setSearchVectors(vectors);
|
return this.operateService.searchVector(query);
|
}
|
|
private OperateService operateService;
|
|
private double[] mockVector = new double[]{-0.051114246249198914, 0.889954432};
|
}
|