package com.walker.web.agent.impl; import java.util.BitSet; import java.util.concurrent.atomic.AtomicInteger; public class SearchableString { private static final int[] EMPTY = new int[0]; private static final int[][] SINGLE_VALUES = getSingleValues(); private final char[] myChars; private final int[][] myIndices; private final Cache myPrefixCache = new Cache(); private final Cache myPostfixCache = new Cache(); // Reusable buffer for findIndices private final int[] myBuffer; /** * Creates a new instance for the specified string value. * @param stringValue The user agent string * @param maxIndex The number of unique literals */ SearchableString(final String stringValue, final int maxIndex) { myChars = stringValue.toCharArray(); myIndices = new int[maxIndex][]; myBuffer = new int[myChars.length]; } /** * Returns the size of this instance. * @return The size */ int getSize() { return myChars.length; } /** * Indicates whether this instance starts with the specified prefix. * @param literal The prefix that should be tested * @return true if the argument represents the prefix of this instance, false otherwise. */ boolean startsWith(final Literal literal) { // Check whether the answer is already in the cache final int index = literal.getIndex(); final Boolean cached = myPrefixCache.get(index); if (cached != null) { return cached.booleanValue(); } // Get the answer and cache the result final boolean result = literal.matches(myChars, 0); myPrefixCache.set(index, result); return result; } /** * Indicates whether this instance ends with the specified postfix. * @param literal The postfix that should be tested * @return true if the argument represents the postfix of this instance, false otherwise. */ boolean endsWith(final Literal literal) { // Check whether the answer is already in the cache final int index = literal.getIndex(); final Boolean cached = myPostfixCache.get(index); if (cached != null) { return cached.booleanValue(); } // Get the answer and cache the result final boolean result = literal.matches(myChars, myChars.length - literal.getLength()); myPostfixCache.set(index, result); return result; } /** * Returns all indices where the literal argument can be found in this String. Results are cached for better * performance. * @param literal The string that should be found * @return all indices where the literal argument can be found in this String. */ int[] getIndices(final Literal literal) { // Check whether the answer is already in the cache final int index = literal.getIndex(); final int[] cached = myIndices[index]; if (cached != null) { return cached; } // Find all indices final int[] values = findIndices(literal); myIndices[index] = values; return values; } /** * Returns all indices where the literal argument can be found in this String. * @param literal The string that should be found * @return all indices where the literal argument can be found in this String. */ private int[] findIndices(final Literal literal) { int count = 0; final char s = literal.getFirstChar(); for (int i = 0; i < myChars.length; i++) { // Check the first char for better performance and check the complete string if ((myChars[i] == s || s == '?') && literal.matches(myChars, i)) { // This index matches myBuffer[count] = i; count++; } } // Check whether any match has been found if (count == 0) { return EMPTY; } // Use an existing array if (count == 1 && myBuffer[0] < SINGLE_VALUES.length) { final int index = myBuffer[0]; return SINGLE_VALUES[index]; } // Copy the values final int[] values = new int[count]; for (int i = 0; i < count; i++) { values[i] = myBuffer[i]; } return values; } /** * {@inheritDoc} */ @Override public String toString() { return new String(myChars); } private static final int[][] getSingleValues() { final int[][] result = new int[1024][]; for (int i = 0; i < result.length; i++) { result[i] = new int[]{i}; } return result; } /** Compact cache for boolean values. */ static class Cache { // The boolean values private final BitSet myValues = new BitSet(); // Indicates whether a value has been stored for an index private final BitSet myIsKnown = new BitSet(); /** * Gets the cached value for the specified index. * @param index The index of the requested value * @return The cached boolean value, or null if no value is present in the cache. */ Boolean get(final int index) { // Check whether a true value has been set if (myValues.get(index)) { return Boolean.TRUE; } // Check whether any value has been stored if (myIsKnown.get(index)) { return Boolean.FALSE; } // No value found return null; } /** * Set the value in the cache. * @param index The index for the stored value * @param flag The actual value */ void set(final int index, final boolean flag) { // Store the value myValues.set(index, flag); // Store the fact the value has been stored myIsKnown.set(index, true); } } } /** * This combines a String value with a unique int value. The int value is used for caching of results. */ class Literal { // The actual string data private final char[] myCharacters; // The unique index for this instance private final int myIndex; /** * Creates a new instance with the specified non-empty value. * @param value The String value * @param index The unique index for this instance */ Literal(final String value, final int index) { myCharacters = value.toCharArray(); myIndex = index; } /** * Returns the first character for quick checks. * @return the first character */ char getFirstChar() { return myCharacters[0]; } /** * Returns the size of this instance. * @return The size of this instance */ int getLength() { return myCharacters.length; } /** * Checks whether the value represents a complete substring from the from index. * @param from The start index of the potential substring * @return true If the arguments represent a valid substring, false otherwise. */ boolean matches(final char[] value, final int from) { // Check the bounds final int len = myCharacters.length; if (len + from > value.length || from < 0) { return false; } // Bounds are ok, check all characters. // Allow question marks to match any character for (int i = 0; i < len; i++) { if (myCharacters[i] != value[i + from] && myCharacters[i] != '?') { return false; } } // All characters match return true; } /** * Returns the unique index of this instance. * @return the unique index of this instance. */ final int getIndex() { return myIndex; } private static boolean contains(final char[] characters, final char value) { for (final char c : characters) { if (c == value) { return true; } } return false; } boolean requires(final String value) { final int len = value.length(); if (len == 1) { return contains(myCharacters, value.charAt(0)); } if (len > myCharacters.length) { return false; } return toString().contains(value); } /** * {@inheritDoc} */ @Override public String toString() { return new String(myCharacters); } } class LiteralDomain { // Keep track of the total number of instances private final AtomicInteger myNrOfInstances = new AtomicInteger(); Literal createLiteral(final String contents) { return new Literal(contents, myNrOfInstances.getAndAdd(1)); } SearchableString getSearchableString(final String contents) { final int maxIndex = myNrOfInstances.get() + 1; return new SearchableString(contents, maxIndex); } }