/// <summary> /// If we have an expression that has a wildcard before an ignore character, we re-shuffle them so the single character /// comes first, otherwise we'll get a false positive when evaluating the wild card in the CompareAt method. /// </summary> private void NormaliseWildcards() { for (int atomIndex = 0; atomIndex < (_atomsLength - 1); atomIndex++) { if ((_atoms[atomIndex].AtomType == LikeExpressionType.WildCard) && (_atoms[atomIndex + 1].AtomType == LikeExpressionType.IgnoreCharacter)) { LikeAtom oldAtom = _atoms[atomIndex]; _atoms[atomIndex] = _atoms[atomIndex + 1]; _atoms[atomIndex + 1] = oldAtom; } } }
/// <summary> /// Parses the supplied like string into a number of LikeAtom instances, which we use when parsing /// candidate strings to establish a match. Obviously the overhead for this is potentially "high" /// but it's intended that you would re-use the same like instance for comparing numerous strings /// which this class should be adept at. /// </summary> private void ParseIntoAtoms() { bool escaping = false; bool charSet = false; bool range = false; bool notInSet = false; int matchExpressionLength = _matchExpression.Length; _atoms = new LikeAtom[matchExpressionLength]; _atomsLength = 0; var ranges = new ArrayList(); for (int matchExpCharIndex = 0; matchExpCharIndex < matchExpressionLength; matchExpCharIndex++) { char currentChar = _matchExpression[matchExpCharIndex]; // character set specification if (charSet) { if ((currentChar == '^') && (ranges.Count == 0)) { if (!notInSet) { notInSet = true; } else { ranges.Add(new[] { currentChar, currentChar }); } } else if (currentChar == ']') { // if "-" was the last character in the sequence, then it wasn't a range of characters... if (range) { ranges.Add(new[] { '-', '-' }); } char[] matchCharacters; char[][] matchRanges; SimplifyRanges(ranges, out matchCharacters, out matchRanges); if (notInSet) { _atoms[_atomsLength++] = new LikeAtom(LikeExpressionType.NotInCharacterSet, matchCharacters, matchRanges); } else { _atoms[_atomsLength++] = new LikeAtom(LikeExpressionType.CharacterSet, matchCharacters, matchRanges); } charSet = false; range = false; notInSet = false; } else if (currentChar == '-') { if (!range) { if (ranges.Count > 0) { range = true; } } else { ranges.Add(new[] { '-', '-' }); } } else { if (range) { char cStart = _matchExpression[matchExpCharIndex - 2]; char cEnd = currentChar; if (cStart > cEnd) { // swap them round, the optimiser will most likely eliminate the tempStart // for a register or something similar. I'll never test the hypothesis ;o) char tempStart = cStart; cStart = cEnd; cEnd = tempStart; } ranges.Add(new[] { cStart, cEnd }); // toggle range parsing off again range = false; } else { ranges.Add(new[] { currentChar, currentChar }); } } } else if (escaping == false) { if (_applyEscapeCharacter && (currentChar == _escapeCharacter)) { // escape the next character escaping = true; continue; } else if (currentChar == '_') { _atoms[_atomsLength++] = new LikeAtom(LikeExpressionType.IgnoreCharacter); } else if (currentChar == '%') { _atoms[_atomsLength++] = new LikeAtom(LikeExpressionType.WildCard); } else if (currentChar == '[') { charSet = true; ranges.Clear(); } else { _atoms[_atomsLength++] = new LikeAtom(LikeExpressionType.SingleCharacter, currentChar); } } else { // regardless of the character, we escape its contents _atoms[_atomsLength++] = new LikeAtom(LikeExpressionType.SingleCharacter, currentChar); // and turn off escaping, to allow the correct parsing of the rest of the string. escaping = false; } } }