public void CanDecomposeDupes() { var cc1 = new CharacterClass(); cc1.Elements.Add(new CharacterClassElement('b', 'y')); var cc2 = new CharacterClass(); cc2.Elements.Add(new CharacterClassElement('b', 'y')); var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {cc1, cc2}); Assert.AreEqual(1, newMappings.Alphabet.Count); Assert.AreEqual(new CharacterClassElement('b', 'y'), newMappings.Alphabet.First()); Assert.AreEqual(new CharacterClassElement('b', 'y'), newMappings.Mapping[cc1].Elements.First()); Assert.AreEqual(new CharacterClassElement('b', 'y'), newMappings.Mapping[cc2].Elements.First()); }
public static ISet<CharacterClassElement> NormaliseAST(RegexNode rootNode) { //walk the tree and find all char classes var matchClassLink = new Dictionary<CharacterClass, CharacterClassMatchNode>(); Action<RegexNode> classFinder = null; classFinder = (rxNode) => { var curAltNode = rxNode.FirstAlternative; while (curAltNode != null) { var curMatchNode = curAltNode.FirstFactor; while (curMatchNode != null) { if (curMatchNode is GroupMatchNode) { var g = (GroupMatchNode) curMatchNode; classFinder(g.Body); } else if (curMatchNode is CharacterClassMatchNode) { var cn = (CharacterClassMatchNode) curMatchNode; matchClassLink[cn.MatchingCharacterClass] = cn; } curMatchNode = curMatchNode.Next; } curAltNode = curAltNode.Next; } }; classFinder(rootNode); //that's filled, now map them //but hide a [min-max] class to make sre the alphabet is total var allClass = new CharacterClass(); allClass.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue)); //matchClassLink[allClass] = null; var rStruct = NormaliseCharacterClasses(matchClassLink.Keys.ToList()); foreach (var cClass in matchClassLink.Keys) { //if (cClass != allClass) //skip the one we added matchClassLink[cClass].MatchingCharacterClass = rStruct.Mapping[cClass]; } return rStruct.Alphabet; }
public void DecompositionWithAnyClassTest() { var cc1 = new CharacterClass(); cc1.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue)); var cc2 = new CharacterClass(); cc2.Elements.Add(new CharacterClassElement('b', 'k')); cc2.Elements.Add(new CharacterClassElement('w', 'w')); var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] { cc1, cc2 }); Assert.AreEqual(5, newMappings.Alphabet.Count); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement(char.MinValue, 'a')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('b', 'k')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('l', 'v')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('w', 'w')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('x', char.MaxValue)); Assert.AreEqual(2, newMappings.Mapping[cc2].Elements.Count); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('b', 'k')); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('w', 'w')); CollectionAssert.AreEquivalent(newMappings.Alphabet, newMappings.Mapping[cc1].Elements); }
public void CanDecomposePartialDupes() { var cc1 = new CharacterClass(); cc1.Elements.Add(new CharacterClassElement('b', 'y')); var cc2 = new CharacterClass(); cc2.Elements.Add(new CharacterClassElement('b', 'v')); var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] { cc1, cc2 }); CollectionAssert.AreEquivalent(new[] { new CharacterClassElement('b', 'v'), new CharacterClassElement('w', 'y') }, newMappings.Alphabet); CollectionAssert.AreEquivalent(new[] { new CharacterClassElement('b', 'v'), new CharacterClassElement('w', 'y') }, newMappings.Mapping[cc1].Elements); CollectionAssert.AreEquivalent(new[] { new CharacterClassElement('b', 'v'), }, newMappings.Mapping[cc2].Elements); }
private CharacterClass ParseCharClass() { var cClass = new CharacterClass(); switch (CurToken.Type) { case TokenType.OPENCLASS: case TokenType.OPENCLASSNEGATE: cClass.IsNegated = ParseCharClassOpening(); cClass.Elements.UnionWith(ParseCharClassBody()); AssertAndAdvance(TokenType.CLOSECLASS); return cClass; case TokenType.CHAR: var curC = CurCharToken.Character; cClass.Elements.Add(new CharacterClassElement(curC, curC)); AdvanceInput(); return cClass; default: throw new ParseException("Invalid input lookahead"); } }
private MatchFactorNode ParseMatchFactor() { switch (CurToken.Type) { case TokenType.OPENCLASS: case TokenType.OPENCLASSNEGATE: case TokenType.CHAR: var cClass = ParseCharClass(); return new CharacterClassMatchNode(cClass); case TokenType.OPENGROUP: case TokenType.OPENGROUPNOCAP: var shouldCapture = ParseGroupOpening(); var group = new GroupMatchNode(capturing: shouldCapture); group.Body = ParseRegexBody(); AssertAndAdvance(TokenType.CLOSEGROUP); return group; case TokenType.ANY: var anyClass = new CharacterClass(); anyClass.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue)); AdvanceInput(); return new CharacterClassMatchNode(anyClass); default: throw new ParseException("Invalid input lookahead"); } }
public CharacterClassMatchNode(CharacterClass cClass, UnaryOperatorType op = UnaryOperatorType.None, MatchFactorNode next = null) : base(op, next) { MatchingCharacterClass = cClass; }
public void NegatedRangesDecompositionTest() { var ccn = new CharacterClass(); ccn.IsNegated = true; ccn.Elements.Add(new CharacterClassElement('b', 'q')); var ccAll = new CharacterClass(); ccAll.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue)); var ccMiddle = new CharacterClass(); ccMiddle.Elements.Add(new CharacterClassElement('k', 'm')); var ccEnd = new CharacterClass(); ccEnd.Elements.Add(new CharacterClassElement('t', 'y')); var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {ccn, ccAll, ccMiddle, ccEnd}); Assert.AreEqual(7, newMappings.Alphabet.Count); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement(char.MinValue, 'a')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('b', 'j')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('k', 'm')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('n', 'q')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('r', 's')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('t', 'y')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('z', char.MaxValue)); CollectionAssert.AreEquivalent(newMappings.Alphabet, newMappings.Mapping[ccAll].Elements); Assert.IsFalse(newMappings.Mapping[ccn].IsNegated); CollectionAssert.AreEquivalent(new[] { new CharacterClassElement(char.MinValue, 'a'), new CharacterClassElement('r', 's'), new CharacterClassElement('t', 'y'), new CharacterClassElement('z', char.MaxValue) }, newMappings.Mapping[ccn].Elements); CollectionAssert.AreEquivalent(new[] { new CharacterClassElement('k', 'm') }, newMappings.Mapping[ccMiddle].Elements); }
public void SingleDecompositionTest() { var cc1 = new CharacterClass(); cc1.Elements.Add(new CharacterClassElement('a', 'k')); cc1.Elements.Add(new CharacterClassElement('v', 'v')); var cc2 = new CharacterClass(); cc2.Elements.Add(new CharacterClassElement('l', 'z')); cc2.Elements.Add(new CharacterClassElement('b', 'b')); var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {cc1, cc2}); Assert.AreEqual(6, newMappings.Alphabet.Count); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('a', 'a')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('b', 'b')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('c', 'k')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('l', 'u')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('v', 'v')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('w', 'z')); Assert.AreEqual(4, newMappings.Mapping[cc1].Elements.Count); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('a', 'a')); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('b', 'b')); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('c', 'k')); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('v', 'v')); Assert.AreEqual(4, newMappings.Mapping[cc2].Elements.Count); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('l', 'u')); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('v', 'v')); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('w', 'z')); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('b', 'b')); }
public void SimpleDecompositionTest() { var cc1 = new CharacterClass(); cc1.Elements.Add(new CharacterClassElement('a', 'z')); var cc2 = new CharacterClass(); cc2.Elements.Add(new CharacterClassElement('f', 'q')); cc2.Elements.Add(new CharacterClassElement('j', 's')); var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {cc1, cc2}); Assert.AreEqual(5, newMappings.Alphabet.Count); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('a', 'e')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('f', 'i')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('j', 'q')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('r', 's')); CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('t', 'z')); Assert.AreEqual(5, newMappings.Mapping[cc1].Elements.Count); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('a', 'e')); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('f', 'i')); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('j', 'q')); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('r', 's')); CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('t', 'z')); Assert.AreEqual(3, newMappings.Mapping[cc2].Elements.Count); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('f', 'i')); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('j', 'q')); CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('r', 's')); }
private void LinkStates(NFAState src, NFAState dst, CharacterClass cond) { foreach (var el in cond.Elements) LinkStates(src, dst, el); }
public static NormaliseCharacterClassResult NormaliseCharacterClasses(IList<CharacterClass> classes) { var rStruct = new NormaliseCharacterClassResult() { Alphabet = new HashSet<CharacterClassElement>(), Mapping = new Dictionary<CharacterClass, CharacterClass>() }; //Need to create the list of start/end class elements var rangeTokenList = classes.SelectMany(cl => cl.Elements) .SelectMany(el => new CharacterRangeToken[] { new CharacterRangeToken() { Value = el.Start, SourceElement = el, Type = TokenType.Start }, new CharacterRangeToken() { Value = el.End, SourceElement = el, Type = TokenType.End } }) .OrderBy(tk => tk.Value) //stable sort, start tokens always before end tokens .ToList(); //Now break up the ranges //Mapping of old char class els into new components var activeRanges = new Dictionary<CharacterClassElement, ISet<CharacterClassElement>>(); var inactiveRanges = new Dictionary<CharacterClassElement, ISet<CharacterClassElement>>(); var previousToken = new CharacterRangeToken() { Value = char.MinValue, Type = TokenType.Start }; var first = true; foreach (var token in rangeTokenList) { //make our new class var newRange = new CharacterClassElement(previousToken.Value, token.Value); //work out [), (], etc if (previousToken.Type == TokenType.Start && token.Type == TokenType.Start) { newRange = new CharacterClassElement(previousToken.Value, (char)(token.Value - 1)); } else if (previousToken.Type == TokenType.End && token.Type == TokenType.End) { newRange = new CharacterClassElement((char)(previousToken.Value + 1), token.Value); } else if (previousToken.Type == TokenType.End && token.Type == TokenType.Start) { newRange = new CharacterClassElement((char)(previousToken.Value + 1), (char)(token.Value - 1)); } //Make sure there's actually a worthwile range here //Also, skip first so we don't put the \0-token range in //If they have an any char class, this will get added by the logic anyway if (newRange.End >= newRange.Start && !first) { rStruct.Alphabet.Add(newRange); //Now attach it to anything that wants it //Skipped on first iter foreach (var ccel in activeRanges.Keys) activeRanges[ccel].Add(newRange); } //Now add new source classes switch (token.Type) { case TokenType.Start: if (!activeRanges.ContainsKey(token.SourceElement)) activeRanges.Add(token.SourceElement, new HashSet<CharacterClassElement>()); break; case TokenType.End: if (activeRanges.ContainsKey(token.SourceElement)) { inactiveRanges[token.SourceElement] = activeRanges[token.SourceElement]; activeRanges.Remove(token.SourceElement); } break; } previousToken = token; first = false; } //fix cclass mappings foreach (var cClass in classes) { rStruct.Mapping[cClass] = new CharacterClass() { IsNegated = false }; foreach (var newRange in cClass.Elements.SelectMany(el => inactiveRanges[el])) rStruct.Mapping[cClass].Elements.Add(newRange); //If negated, swap to a non-negated class if (cClass.IsNegated) { var newCClass = new CharacterClass(); newCClass.Elements.UnionWith( rStruct.Alphabet.Except(rStruct.Mapping[cClass].Elements)); rStruct.Mapping[cClass] = newCClass; } } return rStruct; }