shikeying
2024-01-11 3b67e947e36133e2a40eb2737b15ea375e157ea0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
package com.walker.openocr.idcard;
 
import com.walker.openocr.RecognizeResult;
import com.walker.openocr.TextBlock;
import com.walker.openocr.table.CellObject;
import com.walker.openocr.util.TableObjectUtils;
import com.walker.openocr.util.TextUtils;
 
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
 
public class IdCardObject extends RecognizeResult<IdCardConfig> {
 
    private List<CellObject> otherCellObjectList = new ArrayList<>();
 
//    private Map<Integer, List<CellObject>> rowCache = new TreeMap<>();
 
    private IdCardConfig idCardConfig = null;
 
    // 身份证正面、背面值定义:0正面,1背面
    private int cardSide = 1;
 
    private String name;
    private String sex;
    private String nation;
    private String birthday;
    private String address;
    private String cardNo;
 
    private String signOffice;
    private String dateStart;
    private String dateEnd;
 
    public String getName() {
        return name;
    }
 
    public String getSex() {
        return sex;
    }
 
    public String getNation() {
        return nation;
    }
 
    public String getBirthday() {
        return birthday;
    }
 
    public String getAddress() {
        return address;
    }
 
    public String getCardNo() {
        return cardNo;
    }
 
    public String getSignOffice() {
        return signOffice;
    }
 
    public String getDateStart() {
        return dateStart;
    }
 
    public String getDateEnd() {
        return dateEnd;
    }
 
    public IdCardObject(IdCardConfig idCardConfig){
        if(idCardConfig == null){
            this.idCardConfig = new IdCardConfig();
        } else {
            this.idCardConfig = idCardConfig;
        }
    }
 
    @Override
    public void calculateValue(){
        this.recognizeCardSide();
        if(this.cardSide == 0){
            // 正面
            if(this.rowCache.size() < 5){
                logger.warn("身份证正面解析到的行数过少(小于5行),只能根据内容关键词解析!");
                this.parseCardFrontInfo();
            } else {
                logger.info("正确识别到身份证正面行数据,依据行解析");
                this.parseCardFrontInfo();
            }
        } else {
            // 反面
            throw new UnsupportedOperationException("暂未实现反面解析代码");
        }
    }
 
    @Override
    public IdCardConfig getRecognizeConfig() {
        return this.idCardConfig;
    }
 
    private void parseCardFrontInfo(){
        if(this.rowCache.size() > 6){
            logger.error("身份证正面解析到行数太多,大于6行,请检查是否图片有误,只解析前6行数据!");
        }
        if(this.rowCache.size() >= 6){
            // 说明地址换行,拼接到第四行地址中
            CellObject addressTwoLineCell = this.rowCache.get(5).get(0);
            // 这里列序号排大一些,为拼合地址行排序准备
            addressTwoLineCell.setOrderColumn(9);
            this.rowCache.get(4).add(addressTwoLineCell);
            List<CellObject> addressRowList = this.rowCache.get(4);
            TableObjectUtils.sortColumnCellList(addressRowList);
            // 第五行的数据删除(地址第二行)
            this.rowCache.remove(5);
            // 把第6行身份证号数据放到第5行,这样我们只解析前5行
            this.rowCache.put(5, this.rowCache.get(6));
        }
 
        int realRows = this.rowCache.size();
 
        // 第1行:姓名
        // 第2行:性别、民族
        // 第3行:出生日期
        // 第4行:住址
        // 第5行:身份证号
        List<CellObject> data = null;
        for(int rowNum=1; rowNum<6; rowNum++){
            if(rowNum > realRows){
                logger.debug("超过最大行,不在解析。rowNum=" + rowNum + ", realRows=" + realRows);
                continue;
            }
            data = this.rowCache.get(rowNum);
            if(rowNum == 1){
                this.name = this.parseOneField(data, IdCardConfig.VALUE_NAME, rowNum);
 
            } else if(rowNum == 3){
                this.birthday = this.parseOneField(data, IdCardConfig.VALUE_BIRTHDAY, rowNum);
 
            } else if(rowNum == 5){
                this.cardNo = this.parseOneField(data, IdCardConfig.VALUE_NUMBER, rowNum);
 
            } else if(rowNum == 2){
                String[] sexNation = this.parseSexNation(data);
                if(sexNation == null || sexNation.length != 2){
                    logger.error("解析性别、民族错误,返回数据缺失:" + data);
                    continue;
                }
                this.sex = sexNation[0];
                this.nation = sexNation[1];
 
            } else if(rowNum == 4){
                this.address = this.parseAddress(data);
            }
        }
    }
 
    /**
     * 返回解析过的结果:性别和民族
     * @param data
     * @return
     */
    private String[] parseSexNation(List<CellObject> data){
        String[] result = new String[2];
        // 直接拼合成一行处理
        StringBuilder sb = new StringBuilder();
        for(CellObject co : data){
            sb.append(co.getSource().getText().trim());
        }
        String value = sb.toString();
        value = TextUtils.removeKeys(value, IdCardConfig.VALUE_SEX);
        value = TextUtils.removeKeys(value, IdCardConfig.VALUE_NATION);
        if(value.indexOf(IdCardConfig.SEX_MALE) >= 0){
            result[0] = IdCardConfig.SEX_MALE;
        } else if(value.indexOf(IdCardConfig.SEX_FEMALE) >= 0){
            result[0] = IdCardConfig.SEX_FEMALE;
        } else {
            logger.warn("未解析到性别:" + value);
            result[0] = TextUtils.EMPTY_VALUE;
        }
        value = TextUtils.removeKeys(value, "男女");
        result[1] = value;
        return result;
    }
 
    private String parseAddress(List<CellObject> data){
        // 把"住址"这个关键词对象去掉
        String value = null;
        CellObject temp = null;
        for(Iterator<CellObject> it = data.iterator(); it.hasNext();){
            temp = it.next();
            value = TextUtils.removeKeys(temp.getSource().getText(), IdCardConfig.VALUE_ADDRESS);
            if(value != null && value.equals(TextUtils.EMPTY_VALUE)){
                logger.debug("移除'住址'单元格:" + value);
                it.remove();
                break;
            }
        }
        logger.debug("住址单元格数量:" + data.size());
        value = "";
        for(CellObject co : data){
            value += co.getSource().getText();
        }
//        if(data.size() == 3){
//            value = data.get(1).getSource().getText() + data.get(2).getSource().getText();
//        } else if(data.size() == 2){
//            value = data.get(0).getSource().getText() + data.get(1).getSource().getText();
//        } else if(data.size() == 1){
//            value = data.get(0).getSource().getText();
//        } else {
//            logger.error("身份证地址信息没数据");
//            return null;
//        }
//        return TextUtils.removeKeys(value, key);
        return value;
    }
 
    private String parseOneField(List<CellObject> data, String key, int rowNum){
        String value = null;
//        if(!isAddress){
            if(data.size() == 2){
                // 存在两个元素,
                if(data.get(0).getSource().getText().equals(key)){
                    value = data.get(1).getSource().getText();
                    logger.debug("匹配到关键字段:" + key + ", value=" + value);
                    return value;
                }
                value = data.get(1).getSource().getText();
                return value;
 
            } else if(data.size() == 1){
                // 只有一个元素,需要依据关键词截取
                value = data.get(0).getSource().getText();
//                String[] keys = key.split("");  // 拆分为每个字替换为空
//                for(String k : keys){
//                    value.replaceFirst(k, "");
//                }
                return TextUtils.removeKeys(value, key);
            } else {
                logger.error("行中没有任何数据,row=" + rowNum);
                return null;
            }
//        }
    }
 
    @Override
    public void sortCellObjectList(){
        CellObject target = null;
        int currentRow = 1;
        for(Iterator<CellObject> it = this.otherCellObjectList.iterator(); it.hasNext();){
            target = it.next();
            List<CellObject> rowSet = null;
            boolean sameRow = false;
 
            // 如果rowCache中没有,则直接放入第一个单元格,行号最后在统一排列
            if(rowCache.size() == 0){
//                rowSet = new ArrayList<>(4);
//                rowSet.add(target);
//                this.rowCache.put(currentRow, rowSet);
                this.createNewRow(target, currentRow);
                currentRow ++;
                continue;
            }
 
            // 检测已存在行数据中,是否有相同行的单元格。如果没有则要新创建行
 
            for(Map.Entry<Integer, List<CellObject>> entry : this.rowCache.entrySet()){
                rowSet = entry.getValue();
                sameRow = TableObjectUtils.isInSameRow(rowSet.get(0), target, idCardConfig.getCellTolerance());
                if(sameRow){
                    logger.debug("找到匹配行:" + entry.getKey() + ", target=" + target.getSource().getText());
                    rowSet.add(target);
                    // 从other集合移除该单元格
                    it.remove();
                    // 退出该循环,从下一个目标元素继续执行
                    break;
                }
            }
 
            // 如果现有行数据中不存在,则创建新行
            if(!sameRow){
                this.createNewRow(target, currentRow);
                currentRow ++;
                continue;
           }
        }
 
        // 每行对列排序
        for(List<CellObject> list : this.rowCache.values()){
            TableObjectUtils.sortColumnCellList(list);
        }
    }
 
    @Override
    public void addTextBlock(TextBlock textBlock){
        CellObject cellObject = new CellObject();
        cellObject.setSource(textBlock);
        cellObject.setId(this.generateCellId());
        this.otherCellObjectList.add(cellObject);
    }
 
    /**
     * 识别正面还是反面
     */
    private void recognizeCardSide(){
//        boolean success = false;
        List<String> idcardFrontKeys = this.idCardConfig.getIdCardFrontKeys();
        for(List<CellObject> list : this.rowCache.values()){
            for(CellObject co : list){
                for(String key : idcardFrontKeys){
                    if(co.getSource().getText().indexOf(key) >= 0){
                        logger.debug("找到身份证(正面)关键词:" + key);
                        this.cardSide = 0;
//                        success = true;
                        break;
                    }
                }
            }
        }
    }
 
    private void createNewRow(CellObject target, int rowNumber){
        List<CellObject> rowSet = new ArrayList<>(4);
        rowSet.add(target);
        this.rowCache.put(rowNumber, rowSet);
    }
 
    private String generateCellId(){
        return String.valueOf(System.nanoTime());
    }
 
    @Override
    public String toString(){
        return new StringBuilder("[name=").append(this.name)
                .append(", sex=").append(this.sex)
                .append(", nation=").append(this.nation)
                .append(", birthday=").append(this.birthday)
                .append(", address=").append(this.address)
                .append(", no=").append(this.cardNo)
                .append(", side=").append(this.cardSide)
                .append("]").toString();
    }
}