shikeying
2024-01-11 3b67e947e36133e2a40eb2737b15ea375e157ea0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
package com.walker.openocr.table;
 
import com.walker.openocr.AbstractTextResolver;
import com.walker.openocr.OcrType;
import com.walker.openocr.TextBlock;
import com.walker.openocr.util.TableObjectUtils;
 
import java.util.ArrayList;
import java.util.List;
 
public class TableTextResolver extends AbstractTextResolver<TableObject, TableConfig> {
 
    public TableTextResolver(){
        this.setOcrType(OcrType.TextTable);
    }
 
    @Override
    protected TableObject doResolveGeneric(List<TextBlock> dataList, List<TableConfig> configList) {
        throw new UnsupportedOperationException("不支持该方法");
    }
 
    @Override
    protected TableObject doResolveIdCard(List<TextBlock> dataList, List<TableConfig> configList) {
        throw new UnsupportedOperationException("不支持该方法");
    }
 
    @Override
    protected TableObject doResolveTable(List<TextBlock> dataList, List<TableConfig> configList) {
        if(configList == null || configList.size() == 0){
            logger.error("table configList 未设置,无法解析表格对象");
            return null;
        }
 
        TableConfig tableConfig = this.getTableType(dataList, configList);
        if(tableConfig == null){
//            logger.error("未找到:tableConfig,无法继续解析表格");
//            return null;
            logger.error("未找到:tableConfig,尝试使用最后一个作为兜底模板配置");
            tableConfig = configList.get(configList.size()-1);
        }
 
        // 要返回的表格对象。
        TableObject tableObject = new TableObject(tableConfig);
 
//        List<TextBlock> availableList = this.filterRemovedNames(tableConfig, dataList);
//        if(availableList != null){
//            for(TextBlock tb : availableList){
//                tableObject.addTextBlock(tb);
//            }
//        }
//        tableObject.sortCellObjectList();
//        tableObject.calculateValue();
//        tableObject.printRowCache();
        this.flowTableObject(tableObject, tableConfig, dataList);
 
        return tableObject;
    }
 
    /**
     * 把处理流程组合到一起调用计算并填充表格结果对象。
     * @param tableObject
     * @param tableConfig
     * @param dataList
     * @date 2022-09-30
     */
    protected void flowTableObject(TableObject tableObject, TableConfig tableConfig, List<TextBlock> dataList){
        List<TextBlock> availableList = this.filterRemovedNames(tableConfig, dataList);
        if(availableList != null){
            for(TextBlock tb : availableList){
//                logger.debug(tb.toString());
                tableObject.addTextBlock(tb);
            }
        }
        tableObject.sortCellObjectList();
        tableObject.calculateValue();
        tableObject.printRowCache();
    }
 
    private List<TextBlock> filterRemovedNames(TableConfig tableConfig, List<TextBlock> dataList){
        String[] removeNames = tableConfig.getRemoveColumnsName();
        if(removeNames == null || removeNames.length == 0){
            logger.info("tableConfig:未配置移除关键词");
            return dataList;
        }
 
        List<TextBlock> availableList = new ArrayList<>();
        for(TextBlock textBlock : dataList){
            boolean exist = false;
            for(String name : removeNames){
                if(textBlock.getText().equals(name)){
                    exist = true;
                    break;
                }
            }
            if(!exist){
                availableList.add(textBlock);
            } else {
                logger.debug("移除数据: " + textBlock.getText());
            }
        }
        return availableList;
    }
 
    private TableConfig getTableType(List<TextBlock> dataList, List<TableConfig> configList){
        List<String[]> tableKeysList = null;
        for(TableConfig tableConfig : configList){
            // 2023-11-01 先检查是否存在不应该包含的:标题关键词
            if(tableConfig.getTableTitleNotKey().size() > 0){
//                boolean notKeyMatch = false;
//                for(TextBlock textBlock : dataList){
//                    if(tableConfig.getTableTitleNotKey().contains(textBlock.getText())){
//                        notKeyMatch = true;
//                        break;
//                    }
//                }
//                if(notKeyMatch){
//                    continue;
//                }
                if(TableObjectUtils.containTableKeyNot(dataList, tableConfig.getTableTitleNotKey())){
                    logger.debug("继续下一个,标题包含了排除关键词:{}", tableConfig.getTableTitleNotKey());
                    continue;
                }
            }
 
            tableKeysList = tableConfig.getTableTypeKeys();
            if(tableKeysList == null || tableKeysList.size() == 0){
                throw new IllegalArgumentException("TableConfig中未配置:tableTypeKeys属性, table=" + tableConfig.getTableTypeKeys());
            }
            if(tableKeysList.size() > 1){
                throw new UnsupportedOperationException("暂时没实现多组关键词并列代码,请使用单组,如:大地,保险,机动车");
            }
            int successSize = 0;
            for(String s : tableKeysList.get(0)){
                for(TextBlock textBlock : dataList){
                    if(textBlock.getText().contains(s)){
                        successSize ++;
                        break;
                    }
                }
            }
            if(successSize > 0 && successSize == tableKeysList.get(0).length){
                logger.info("搜索到表格类型:" + tableConfig);
                return tableConfig;
            }
        }
        return null;
    }
}