package com.walker.openocr.table; import com.walker.openocr.AbstractTextResolver; import com.walker.openocr.OcrType; import com.walker.openocr.TextBlock; import com.walker.openocr.util.TableObjectUtils; import java.util.ArrayList; import java.util.List; public class TableTextResolver extends AbstractTextResolver { public TableTextResolver(){ this.setOcrType(OcrType.TextTable); } @Override protected TableObject doResolveGeneric(List dataList, List configList) { throw new UnsupportedOperationException("不支持该方法"); } @Override protected TableObject doResolveIdCard(List dataList, List configList) { throw new UnsupportedOperationException("不支持该方法"); } @Override protected TableObject doResolveTable(List dataList, List configList) { if(configList == null || configList.size() == 0){ logger.error("table configList 未设置,无法解析表格对象"); return null; } TableConfig tableConfig = this.getTableType(dataList, configList); if(tableConfig == null){ // logger.error("未找到:tableConfig,无法继续解析表格"); // return null; logger.error("未找到:tableConfig,尝试使用最后一个作为兜底模板配置"); tableConfig = configList.get(configList.size()-1); } // 要返回的表格对象。 TableObject tableObject = new TableObject(tableConfig); // List availableList = this.filterRemovedNames(tableConfig, dataList); // if(availableList != null){ // for(TextBlock tb : availableList){ // tableObject.addTextBlock(tb); // } // } // tableObject.sortCellObjectList(); // tableObject.calculateValue(); // tableObject.printRowCache(); this.flowTableObject(tableObject, tableConfig, dataList); return tableObject; } /** * 把处理流程组合到一起调用计算并填充表格结果对象。 * @param tableObject * @param tableConfig * @param dataList * @date 2022-09-30 */ protected void flowTableObject(TableObject tableObject, TableConfig tableConfig, List dataList){ List availableList = this.filterRemovedNames(tableConfig, dataList); if(availableList != null){ for(TextBlock tb : availableList){ // logger.debug(tb.toString()); tableObject.addTextBlock(tb); } } tableObject.sortCellObjectList(); tableObject.calculateValue(); tableObject.printRowCache(); } private List filterRemovedNames(TableConfig tableConfig, List dataList){ String[] removeNames = tableConfig.getRemoveColumnsName(); if(removeNames == null || removeNames.length == 0){ logger.info("tableConfig:未配置移除关键词"); return dataList; } List availableList = new ArrayList<>(); for(TextBlock textBlock : dataList){ boolean exist = false; for(String name : removeNames){ if(textBlock.getText().equals(name)){ exist = true; break; } } if(!exist){ availableList.add(textBlock); } else { logger.debug("移除数据: " + textBlock.getText()); } } return availableList; } private TableConfig getTableType(List dataList, List configList){ List tableKeysList = null; for(TableConfig tableConfig : configList){ // 2023-11-01 先检查是否存在不应该包含的:标题关键词 if(tableConfig.getTableTitleNotKey().size() > 0){ // boolean notKeyMatch = false; // for(TextBlock textBlock : dataList){ // if(tableConfig.getTableTitleNotKey().contains(textBlock.getText())){ // notKeyMatch = true; // break; // } // } // if(notKeyMatch){ // continue; // } if(TableObjectUtils.containTableKeyNot(dataList, tableConfig.getTableTitleNotKey())){ logger.debug("继续下一个,标题包含了排除关键词:{}", tableConfig.getTableTitleNotKey()); continue; } } tableKeysList = tableConfig.getTableTypeKeys(); if(tableKeysList == null || tableKeysList.size() == 0){ throw new IllegalArgumentException("TableConfig中未配置:tableTypeKeys属性, table=" + tableConfig.getTableTypeKeys()); } if(tableKeysList.size() > 1){ throw new UnsupportedOperationException("暂时没实现多组关键词并列代码,请使用单组,如:大地,保险,机动车"); } int successSize = 0; for(String s : tableKeysList.get(0)){ for(TextBlock textBlock : dataList){ if(textBlock.getText().contains(s)){ successSize ++; break; } } } if(successSize > 0 && successSize == tableKeysList.get(0).length){ logger.info("搜索到表格类型:" + tableConfig); return tableConfig; } } return null; } }