package com.walker.openocr.idcard;
|
|
import com.walker.openocr.RecognizeResult;
|
import com.walker.openocr.TextBlock;
|
import com.walker.openocr.table.CellObject;
|
import com.walker.openocr.util.TableObjectUtils;
|
import com.walker.openocr.util.TextUtils;
|
|
import java.util.ArrayList;
|
import java.util.Iterator;
|
import java.util.List;
|
import java.util.Map;
|
|
public class IdCardObject extends RecognizeResult<IdCardConfig> {
|
|
private List<CellObject> otherCellObjectList = new ArrayList<>();
|
|
// private Map<Integer, List<CellObject>> rowCache = new TreeMap<>();
|
|
private IdCardConfig idCardConfig = null;
|
|
// 身份证正面、背面值定义:0正面,1背面
|
private int cardSide = 1;
|
|
private String name;
|
private String sex;
|
private String nation;
|
private String birthday;
|
private String address;
|
private String cardNo;
|
|
private String signOffice;
|
private String dateStart;
|
private String dateEnd;
|
|
public String getName() {
|
return name;
|
}
|
|
public String getSex() {
|
return sex;
|
}
|
|
public String getNation() {
|
return nation;
|
}
|
|
public String getBirthday() {
|
return birthday;
|
}
|
|
public String getAddress() {
|
return address;
|
}
|
|
public String getCardNo() {
|
return cardNo;
|
}
|
|
public String getSignOffice() {
|
return signOffice;
|
}
|
|
public String getDateStart() {
|
return dateStart;
|
}
|
|
public String getDateEnd() {
|
return dateEnd;
|
}
|
|
public IdCardObject(IdCardConfig idCardConfig){
|
if(idCardConfig == null){
|
this.idCardConfig = new IdCardConfig();
|
} else {
|
this.idCardConfig = idCardConfig;
|
}
|
}
|
|
@Override
|
public void calculateValue(){
|
this.recognizeCardSide();
|
if(this.cardSide == 0){
|
// 正面
|
if(this.rowCache.size() < 5){
|
logger.warn("身份证正面解析到的行数过少(小于5行),只能根据内容关键词解析!");
|
this.parseCardFrontInfo();
|
} else {
|
logger.info("正确识别到身份证正面行数据,依据行解析");
|
this.parseCardFrontInfo();
|
}
|
} else {
|
// 反面
|
throw new UnsupportedOperationException("暂未实现反面解析代码");
|
}
|
}
|
|
@Override
|
public IdCardConfig getRecognizeConfig() {
|
return this.idCardConfig;
|
}
|
|
private void parseCardFrontInfo(){
|
if(this.rowCache.size() > 6){
|
logger.error("身份证正面解析到行数太多,大于6行,请检查是否图片有误,只解析前6行数据!");
|
}
|
if(this.rowCache.size() >= 6){
|
// 说明地址换行,拼接到第四行地址中
|
CellObject addressTwoLineCell = this.rowCache.get(5).get(0);
|
// 这里列序号排大一些,为拼合地址行排序准备
|
addressTwoLineCell.setOrderColumn(9);
|
this.rowCache.get(4).add(addressTwoLineCell);
|
List<CellObject> addressRowList = this.rowCache.get(4);
|
TableObjectUtils.sortColumnCellList(addressRowList);
|
// 第五行的数据删除(地址第二行)
|
this.rowCache.remove(5);
|
// 把第6行身份证号数据放到第5行,这样我们只解析前5行
|
this.rowCache.put(5, this.rowCache.get(6));
|
}
|
|
int realRows = this.rowCache.size();
|
|
// 第1行:姓名
|
// 第2行:性别、民族
|
// 第3行:出生日期
|
// 第4行:住址
|
// 第5行:身份证号
|
List<CellObject> data = null;
|
for(int rowNum=1; rowNum<6; rowNum++){
|
if(rowNum > realRows){
|
logger.debug("超过最大行,不在解析。rowNum=" + rowNum + ", realRows=" + realRows);
|
continue;
|
}
|
data = this.rowCache.get(rowNum);
|
if(rowNum == 1){
|
this.name = this.parseOneField(data, IdCardConfig.VALUE_NAME, rowNum);
|
|
} else if(rowNum == 3){
|
this.birthday = this.parseOneField(data, IdCardConfig.VALUE_BIRTHDAY, rowNum);
|
|
} else if(rowNum == 5){
|
this.cardNo = this.parseOneField(data, IdCardConfig.VALUE_NUMBER, rowNum);
|
|
} else if(rowNum == 2){
|
String[] sexNation = this.parseSexNation(data);
|
if(sexNation == null || sexNation.length != 2){
|
logger.error("解析性别、民族错误,返回数据缺失:" + data);
|
continue;
|
}
|
this.sex = sexNation[0];
|
this.nation = sexNation[1];
|
|
} else if(rowNum == 4){
|
this.address = this.parseAddress(data);
|
}
|
}
|
}
|
|
/**
|
* 返回解析过的结果:性别和民族
|
* @param data
|
* @return
|
*/
|
private String[] parseSexNation(List<CellObject> data){
|
String[] result = new String[2];
|
// 直接拼合成一行处理
|
StringBuilder sb = new StringBuilder();
|
for(CellObject co : data){
|
sb.append(co.getSource().getText().trim());
|
}
|
String value = sb.toString();
|
value = TextUtils.removeKeys(value, IdCardConfig.VALUE_SEX);
|
value = TextUtils.removeKeys(value, IdCardConfig.VALUE_NATION);
|
if(value.indexOf(IdCardConfig.SEX_MALE) >= 0){
|
result[0] = IdCardConfig.SEX_MALE;
|
} else if(value.indexOf(IdCardConfig.SEX_FEMALE) >= 0){
|
result[0] = IdCardConfig.SEX_FEMALE;
|
} else {
|
logger.warn("未解析到性别:" + value);
|
result[0] = TextUtils.EMPTY_VALUE;
|
}
|
value = TextUtils.removeKeys(value, "男女");
|
result[1] = value;
|
return result;
|
}
|
|
private String parseAddress(List<CellObject> data){
|
// 把"住址"这个关键词对象去掉
|
String value = null;
|
CellObject temp = null;
|
for(Iterator<CellObject> it = data.iterator(); it.hasNext();){
|
temp = it.next();
|
value = TextUtils.removeKeys(temp.getSource().getText(), IdCardConfig.VALUE_ADDRESS);
|
if(value != null && value.equals(TextUtils.EMPTY_VALUE)){
|
logger.debug("移除'住址'单元格:" + value);
|
it.remove();
|
break;
|
}
|
}
|
logger.debug("住址单元格数量:" + data.size());
|
value = "";
|
for(CellObject co : data){
|
value += co.getSource().getText();
|
}
|
// if(data.size() == 3){
|
// value = data.get(1).getSource().getText() + data.get(2).getSource().getText();
|
// } else if(data.size() == 2){
|
// value = data.get(0).getSource().getText() + data.get(1).getSource().getText();
|
// } else if(data.size() == 1){
|
// value = data.get(0).getSource().getText();
|
// } else {
|
// logger.error("身份证地址信息没数据");
|
// return null;
|
// }
|
// return TextUtils.removeKeys(value, key);
|
return value;
|
}
|
|
private String parseOneField(List<CellObject> data, String key, int rowNum){
|
String value = null;
|
// if(!isAddress){
|
if(data.size() == 2){
|
// 存在两个元素,
|
if(data.get(0).getSource().getText().equals(key)){
|
value = data.get(1).getSource().getText();
|
logger.debug("匹配到关键字段:" + key + ", value=" + value);
|
return value;
|
}
|
value = data.get(1).getSource().getText();
|
return value;
|
|
} else if(data.size() == 1){
|
// 只有一个元素,需要依据关键词截取
|
value = data.get(0).getSource().getText();
|
// String[] keys = key.split(""); // 拆分为每个字替换为空
|
// for(String k : keys){
|
// value.replaceFirst(k, "");
|
// }
|
return TextUtils.removeKeys(value, key);
|
} else {
|
logger.error("行中没有任何数据,row=" + rowNum);
|
return null;
|
}
|
// }
|
}
|
|
@Override
|
public void sortCellObjectList(){
|
CellObject target = null;
|
int currentRow = 1;
|
for(Iterator<CellObject> it = this.otherCellObjectList.iterator(); it.hasNext();){
|
target = it.next();
|
List<CellObject> rowSet = null;
|
boolean sameRow = false;
|
|
// 如果rowCache中没有,则直接放入第一个单元格,行号最后在统一排列
|
if(rowCache.size() == 0){
|
// rowSet = new ArrayList<>(4);
|
// rowSet.add(target);
|
// this.rowCache.put(currentRow, rowSet);
|
this.createNewRow(target, currentRow);
|
currentRow ++;
|
continue;
|
}
|
|
// 检测已存在行数据中,是否有相同行的单元格。如果没有则要新创建行
|
|
for(Map.Entry<Integer, List<CellObject>> entry : this.rowCache.entrySet()){
|
rowSet = entry.getValue();
|
sameRow = TableObjectUtils.isInSameRow(rowSet.get(0), target, idCardConfig.getCellTolerance());
|
if(sameRow){
|
logger.debug("找到匹配行:" + entry.getKey() + ", target=" + target.getSource().getText());
|
rowSet.add(target);
|
// 从other集合移除该单元格
|
it.remove();
|
// 退出该循环,从下一个目标元素继续执行
|
break;
|
}
|
}
|
|
// 如果现有行数据中不存在,则创建新行
|
if(!sameRow){
|
this.createNewRow(target, currentRow);
|
currentRow ++;
|
continue;
|
}
|
}
|
|
// 每行对列排序
|
for(List<CellObject> list : this.rowCache.values()){
|
TableObjectUtils.sortColumnCellList(list);
|
}
|
}
|
|
@Override
|
public void addTextBlock(TextBlock textBlock){
|
CellObject cellObject = new CellObject();
|
cellObject.setSource(textBlock);
|
cellObject.setId(this.generateCellId());
|
this.otherCellObjectList.add(cellObject);
|
}
|
|
/**
|
* 识别正面还是反面
|
*/
|
private void recognizeCardSide(){
|
// boolean success = false;
|
List<String> idcardFrontKeys = this.idCardConfig.getIdCardFrontKeys();
|
for(List<CellObject> list : this.rowCache.values()){
|
for(CellObject co : list){
|
for(String key : idcardFrontKeys){
|
if(co.getSource().getText().indexOf(key) >= 0){
|
logger.debug("找到身份证(正面)关键词:" + key);
|
this.cardSide = 0;
|
// success = true;
|
break;
|
}
|
}
|
}
|
}
|
}
|
|
private void createNewRow(CellObject target, int rowNumber){
|
List<CellObject> rowSet = new ArrayList<>(4);
|
rowSet.add(target);
|
this.rowCache.put(rowNumber, rowSet);
|
}
|
|
private String generateCellId(){
|
return String.valueOf(System.nanoTime());
|
}
|
|
@Override
|
public String toString(){
|
return new StringBuilder("[name=").append(this.name)
|
.append(", sex=").append(this.sex)
|
.append(", nation=").append(this.nation)
|
.append(", birthday=").append(this.birthday)
|
.append(", address=").append(this.address)
|
.append(", no=").append(this.cardNo)
|
.append(", side=").append(this.cardSide)
|
.append("]").toString();
|
}
|
}
|