package com.walker.web.agent.impl; import com.univocity.parsers.common.record.Record; import com.univocity.parsers.csv.CsvParser; import com.univocity.parsers.csv.CsvParserSettings; import com.walker.web.agent.BrowsCapField; import com.walker.web.agent.Capabilities; import com.walker.web.agent.ParseException; import com.walker.web.agent.UserAgentParser; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.Collection; import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import static com.walker.web.agent.Capabilities.UNKNOWN_BROWSCAP_VALUE; import static java.util.Collections.singleton; public class UserAgentFileParser { // Mapping substrings to unique literal for caching of lookups private final Map myUniqueLiterals = new HashMap<>(); private final Map myCache = new HashMap<>(); private final Map myStrings = new HashMap<>(); private final Mapper myMapper; private final Set myFields; private final LiteralDomain myDomain = new LiteralDomain(); UserAgentFileParser(final Collection fields) { myFields = new HashSet<>(fields); myMapper = new Mapper(myFields); } /** * Parses a csv stream of rules. * @param input The input stream * @param fields The fields that should be stored during parsing * @return a UserAgentParser based on the read rules * @throws IOException If reading the stream failed. * @throws ParseException */ public static UserAgentParser parse(final Reader input, final Collection fields) throws IOException, ParseException { return new UserAgentFileParser(fields).parse(input); } private UserAgentParser parse(final Reader input) throws ParseException { final List rules = new ArrayList<>(); final CsvParserSettings settings = new CsvParserSettings(); final CsvParser csvParser = new CsvParser(settings); csvParser.beginParsing(input); Record record; while ((record = csvParser.parseNextRecord()) != null) { final Rule rule = getRule(record); if (rule != null) { rules.add(rule); } } return new UserAgentParserImpl(rules.toArray(new Rule[0]), myDomain, getDefaultCapabilities()); } Capabilities getDefaultCapabilities() { final Map result = new EnumMap<>(BrowsCapField.class); for (final BrowsCapField field : myFields) { result.put(field, UNKNOWN_BROWSCAP_VALUE); } return getCapabilities(result); } private Rule getRule(final Record record) throws ParseException { if (record.getValues().length <= 47) { return null; } // Normalize: lowercase and remove duplicate wildcards final String pattern = normalizePattern(record.getString(0)); try { final Map values = getBrowsCapFields(record); final Capabilities capabilities = getCapabilities(values); final Rule rule = createRule(pattern, capabilities); // Check reconstructing the pattern if (!pattern.equals(rule.getPattern())) { throw new ParseException("Unable to parse " + pattern); } return rule; } catch (final IllegalStateException e) { throw new ParseException("Unable to parse " + pattern); } } private static String normalizePattern(final String pattern) { final String lowerCase = pattern.toLowerCase(); if (lowerCase.contains("**")) { return lowerCase.replaceAll("\\*+", "*"); } return lowerCase; } private Map getBrowsCapFields(final Record record) { final Map values = new EnumMap<>(BrowsCapField.class); for (final BrowsCapField field : myFields) { values.put(field, getValue(record.getString(field.getIndex()))); } return values; } String getValue(final String value) { if (value == null) { return UNKNOWN_BROWSCAP_VALUE; } final String trimmed = value.trim(); if (trimmed.isEmpty()) { return UNKNOWN_BROWSCAP_VALUE; } final String cached = myStrings.get(trimmed); if (cached != null) { return cached; } myStrings.put(trimmed, trimmed); return trimmed; } Capabilities getCapabilities(final Map values) { final CapabilitiesImpl result = new CapabilitiesImpl(myMapper.getValues(values), myMapper); final Capabilities fromCache = myCache.get(result); if (fromCache != null) { return fromCache; } myCache.put(result, result); return result; } Literal getLiteral(final String value) { return myUniqueLiterals.computeIfAbsent(value, myDomain::createLiteral); } LiteralDomain getDomain() { return myDomain; } Rule createRule(final String pattern, final Capabilities capabilities) { final List parts = getParts(pattern); if (parts.isEmpty()) { throw new IllegalStateException(); } final String first = parts.get(0); if (parts.size() == 1) { if ("*".equals(first)) { return getWildCardRule(); } return new Rule(getLiteral(first), null, null, pattern, capabilities); } final LinkedList suffixes = new LinkedList<>(parts); Literal prefix = null; if (!"*".equals(first)) { prefix = getLiteral(first); suffixes.remove(0); } final String last = parts.get(parts.size() - 1); Literal postfix = null; if (!"*".equals(last)) { postfix = getLiteral(last); suffixes.removeLast(); } suffixes.removeAll(singleton("*")); final Literal[] suffixArray = new Literal[suffixes.size()]; for (int i = 0; i < suffixArray.length; i++) { suffixArray[i] = getLiteral(suffixes.get(i)); } return new Rule(prefix, suffixArray, postfix, pattern, capabilities); } private Rule getWildCardRule() { // The default match all pattern final Map fieldValues = new EnumMap<>(BrowsCapField.class); for (final BrowsCapField field : myFields) { if (!field.isDefault()) { fieldValues.put(field, UNKNOWN_BROWSCAP_VALUE); } } final Capabilities capabilities = getCapabilities(fieldValues); return new Rule(null, new Literal[0], null, "*", capabilities); } static List getParts(final String pattern) { final List parts = new ArrayList<>(); final StringBuilder literal = new StringBuilder(); for (final char c : pattern.toCharArray()) { if (c == '*') { if (literal.length() != 0) { parts.add(literal.toString()); literal.setLength(0); } parts.add(String.valueOf(c)); } else { literal.append(c); } } if (literal.length() != 0) { parts.add(literal.toString()); literal.setLength(0); } return parts; } }