shikeying
2024-04-03 b77abcbc0f17070a2a970e0c4aa5837e90f28e1f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package com.walker.support.milvus;
 
import com.walker.support.milvus.engine.DefaultOperateService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
 
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
 
public class MilvusEngine {
 
    protected final transient Logger logger = LoggerFactory.getLogger(this.getClass());
 
    public MilvusEngine(String ip, int port){
        DefaultOperateService service = new DefaultOperateService();
        service.connect(ip, port);
        this.operateService = service;
        logger.info("connect milvus: {}:{}", ip, port);
    }
 
    public void close(){
        if(this.operateService != null){
            this.operateService.close();
        }
    }
 
    /**
     * 创建表:测试从聊天一键提取工单内容使用。
     * <pre>
     *     1) 从历史工单数据中,收集用户提问内容,整理到表中
     *     2) 把这些数据通过向量转化,写入milvus数据库。
     * </pre>
     * @date 2024-03-28
     */
    public void createChatSimilarTable(){
        Table chatSimilarTable = new Table();
        chatSimilarTable.setCollectionName("chat_similar");
        chatSimilarTable.setDescription("聊天提取工单摘要历史数据");
        chatSimilarTable.setShardsNum(1);
        chatSimilarTable.setDimension(768); // 这个是根据使用向量模型维度定的
 
        // 设置字段
        FieldType id = FieldType.newBuilder()
                .withName("id").withPrimaryKey(true).withMaxLength(18).withDataType(DataType.Long).build();
        FieldType title = FieldType.newBuilder()
                .withName("title").withPrimaryKey(false).withMaxLength(180).withDataType(DataType.VarChar).build();
        FieldType content = FieldType.newBuilder()
                .withName("content").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build();
        FieldType answer = FieldType.newBuilder()
                .withName("answer").withPrimaryKey(false).withMaxLength(255).withDataType(DataType.VarChar).build();
        FieldType embedding = FieldType.newBuilder()
                .withName("embedding").withPrimaryKey(false).withDataType(DataType.FloatVector).withDimension(768).build();
 
        List<FieldType> fieldTypeList = new ArrayList<>(8);
        fieldTypeList.add(id);
        fieldTypeList.add(title);
        fieldTypeList.add(content);
        fieldTypeList.add(answer);
        fieldTypeList.add(embedding);
        chatSimilarTable.setFieldTypes(fieldTypeList);
 
        this.operateService.createTable(chatSimilarTable);
        logger.info("创建了 table = {}", chatSimilarTable.getCollectionName());
 
        // 创建索引
        this.operateService.createIndex(chatSimilarTable.getCollectionName()
                , "embedding", "HNSW", "{\"nlist\":16384, \"efConstruction\":128, \"M\":8}", MetricType.NLP);
        logger.info("创建了 index = {}", chatSimilarTable.getCollectionName() + "_index");
    }
 
    public void dropChatSimilarTable(){
        this.operateService.dropTable("chat_similar");
    }
 
    public void insertTestData(){
        DataSet dataSet = new DataSet();
        dataSet.setTableName("chat_similar");
 
        Map<String, List<?>> fieldMap = new HashMap();
        fieldMap.put("id", Arrays.asList(new Long[]{100L}));
        fieldMap.put("title", Arrays.asList(new String[]{"第一个标题"}));
        fieldMap.put("content", Arrays.asList(new String[]{"第一个内容"}));
        fieldMap.put("answer", Arrays.asList(new String[]{"第一个答案"}));
        fieldMap.put("embedding", Arrays.asList(mockVector));
        dataSet.setFields(fieldMap);
        this.operateService.insertDataSet(dataSet);
        logger.info("写入了测试数据: {}", dataSet);
    }
 
    public OutData searchChatSimilar(List<List<Float>> vectors){
        Query query = new Query();
        query.setMetricType(MetricType.NLP.getIndex());
        query.setTableName("chat_similar");
        query.setTopK(4);
        query.setVectorName("embedding");
        query.setOutFieldList(Arrays.asList(new String[]{"id","title","content"}));
        query.setFieldPrimaryKey("id");
        query.setSearchVectors(vectors);
        return this.operateService.searchVector(query);
    }
 
    private OperateService operateService;
 
    private double[] mockVector = new double[]{-0.051114246249198914, 0.889954432};
}