package com.walker.openocr.table;
|
|
import com.walker.openocr.AbstractTextResolver;
|
import com.walker.openocr.OcrType;
|
import com.walker.openocr.TextBlock;
|
import com.walker.openocr.util.TableObjectUtils;
|
|
import java.util.ArrayList;
|
import java.util.List;
|
|
public class TableTextResolver extends AbstractTextResolver<TableObject, TableConfig> {
|
|
public TableTextResolver(){
|
this.setOcrType(OcrType.TextTable);
|
}
|
|
@Override
|
protected TableObject doResolveGeneric(List<TextBlock> dataList, List<TableConfig> configList) {
|
throw new UnsupportedOperationException("不支持该方法");
|
}
|
|
@Override
|
protected TableObject doResolveIdCard(List<TextBlock> dataList, List<TableConfig> configList) {
|
throw new UnsupportedOperationException("不支持该方法");
|
}
|
|
@Override
|
protected TableObject doResolveTable(List<TextBlock> dataList, List<TableConfig> configList) {
|
if(configList == null || configList.size() == 0){
|
logger.error("table configList 未设置,无法解析表格对象");
|
return null;
|
}
|
|
TableConfig tableConfig = this.getTableType(dataList, configList);
|
if(tableConfig == null){
|
// logger.error("未找到:tableConfig,无法继续解析表格");
|
// return null;
|
logger.error("未找到:tableConfig,尝试使用最后一个作为兜底模板配置");
|
tableConfig = configList.get(configList.size()-1);
|
}
|
|
// 要返回的表格对象。
|
TableObject tableObject = new TableObject(tableConfig);
|
|
// List<TextBlock> availableList = this.filterRemovedNames(tableConfig, dataList);
|
// if(availableList != null){
|
// for(TextBlock tb : availableList){
|
// tableObject.addTextBlock(tb);
|
// }
|
// }
|
// tableObject.sortCellObjectList();
|
// tableObject.calculateValue();
|
// tableObject.printRowCache();
|
this.flowTableObject(tableObject, tableConfig, dataList);
|
|
return tableObject;
|
}
|
|
/**
|
* 把处理流程组合到一起调用计算并填充表格结果对象。
|
* @param tableObject
|
* @param tableConfig
|
* @param dataList
|
* @date 2022-09-30
|
*/
|
protected void flowTableObject(TableObject tableObject, TableConfig tableConfig, List<TextBlock> dataList){
|
List<TextBlock> availableList = this.filterRemovedNames(tableConfig, dataList);
|
if(availableList != null){
|
for(TextBlock tb : availableList){
|
// logger.debug(tb.toString());
|
tableObject.addTextBlock(tb);
|
}
|
}
|
tableObject.sortCellObjectList();
|
tableObject.calculateValue();
|
tableObject.printRowCache();
|
}
|
|
private List<TextBlock> filterRemovedNames(TableConfig tableConfig, List<TextBlock> dataList){
|
String[] removeNames = tableConfig.getRemoveColumnsName();
|
if(removeNames == null || removeNames.length == 0){
|
logger.info("tableConfig:未配置移除关键词");
|
return dataList;
|
}
|
|
List<TextBlock> availableList = new ArrayList<>();
|
for(TextBlock textBlock : dataList){
|
boolean exist = false;
|
for(String name : removeNames){
|
if(textBlock.getText().equals(name)){
|
exist = true;
|
break;
|
}
|
}
|
if(!exist){
|
availableList.add(textBlock);
|
} else {
|
logger.debug("移除数据: " + textBlock.getText());
|
}
|
}
|
return availableList;
|
}
|
|
private TableConfig getTableType(List<TextBlock> dataList, List<TableConfig> configList){
|
List<String[]> tableKeysList = null;
|
for(TableConfig tableConfig : configList){
|
// 2023-11-01 先检查是否存在不应该包含的:标题关键词
|
if(tableConfig.getTableTitleNotKey().size() > 0){
|
// boolean notKeyMatch = false;
|
// for(TextBlock textBlock : dataList){
|
// if(tableConfig.getTableTitleNotKey().contains(textBlock.getText())){
|
// notKeyMatch = true;
|
// break;
|
// }
|
// }
|
// if(notKeyMatch){
|
// continue;
|
// }
|
if(TableObjectUtils.containTableKeyNot(dataList, tableConfig.getTableTitleNotKey())){
|
logger.debug("继续下一个,标题包含了排除关键词:{}", tableConfig.getTableTitleNotKey());
|
continue;
|
}
|
}
|
|
tableKeysList = tableConfig.getTableTypeKeys();
|
if(tableKeysList == null || tableKeysList.size() == 0){
|
throw new IllegalArgumentException("TableConfig中未配置:tableTypeKeys属性, table=" + tableConfig.getTableTypeKeys());
|
}
|
if(tableKeysList.size() > 1){
|
throw new UnsupportedOperationException("暂时没实现多组关键词并列代码,请使用单组,如:大地,保险,机动车");
|
}
|
int successSize = 0;
|
for(String s : tableKeysList.get(0)){
|
for(TextBlock textBlock : dataList){
|
if(textBlock.getText().contains(s)){
|
successSize ++;
|
break;
|
}
|
}
|
}
|
if(successSize > 0 && successSize == tableKeysList.get(0).length){
|
logger.info("搜索到表格类型:" + tableConfig);
|
return tableConfig;
|
}
|
}
|
return null;
|
}
|
}
|