public void CanDecomposeDupes()
        {
            var cc1 = new CharacterClass();
            cc1.Elements.Add(new CharacterClassElement('b', 'y'));
            var cc2 = new CharacterClass();
            cc2.Elements.Add(new CharacterClassElement('b', 'y'));

            var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {cc1, cc2});

            Assert.AreEqual(1, newMappings.Alphabet.Count);
            Assert.AreEqual(new CharacterClassElement('b', 'y'), newMappings.Alphabet.First());
            Assert.AreEqual(new CharacterClassElement('b', 'y'), newMappings.Mapping[cc1].Elements.First());
            Assert.AreEqual(new CharacterClassElement('b', 'y'), newMappings.Mapping[cc2].Elements.First());
        }
Пример #2
0
        public static ISet<CharacterClassElement> NormaliseAST(RegexNode rootNode)
        {
            //walk the tree and find all char classes
            var matchClassLink = new Dictionary<CharacterClass, CharacterClassMatchNode>();

            Action<RegexNode> classFinder = null;
            classFinder = (rxNode) =>
            {
                var curAltNode = rxNode.FirstAlternative;

                while (curAltNode != null)
                {
                    var curMatchNode = curAltNode.FirstFactor;

                    while (curMatchNode != null)
                    {
                        if (curMatchNode is GroupMatchNode)
                        {
                            var g = (GroupMatchNode) curMatchNode;
                            classFinder(g.Body);
                        }
                        else if (curMatchNode is CharacterClassMatchNode)
                        {
                            var cn = (CharacterClassMatchNode) curMatchNode;
                            matchClassLink[cn.MatchingCharacterClass] = cn;
                        }

                        curMatchNode = curMatchNode.Next;
                    }

                    curAltNode = curAltNode.Next;
                }
            };

            classFinder(rootNode);

            //that's filled, now map them
            //but hide a [min-max] class to make sre the alphabet is total
            var allClass = new CharacterClass();
            allClass.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue));
            //matchClassLink[allClass] = null;
            var rStruct = NormaliseCharacterClasses(matchClassLink.Keys.ToList());
            foreach (var cClass in matchClassLink.Keys)
            {
                //if (cClass != allClass) //skip the one we added
                    matchClassLink[cClass].MatchingCharacterClass = rStruct.Mapping[cClass];
            }
            return rStruct.Alphabet;
        }
        public void DecompositionWithAnyClassTest()
        {
            var cc1 = new CharacterClass();
            cc1.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue));
            var cc2 = new CharacterClass();
            cc2.Elements.Add(new CharacterClassElement('b', 'k'));
            cc2.Elements.Add(new CharacterClassElement('w', 'w'));

            var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] { cc1, cc2 });

            Assert.AreEqual(5, newMappings.Alphabet.Count);
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement(char.MinValue, 'a'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('b', 'k'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('l', 'v'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('w', 'w'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('x', char.MaxValue));

            Assert.AreEqual(2, newMappings.Mapping[cc2].Elements.Count);
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('b', 'k'));
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('w', 'w'));

            CollectionAssert.AreEquivalent(newMappings.Alphabet, newMappings.Mapping[cc1].Elements);
        }
        public void CanDecomposePartialDupes()
        {
            var cc1 = new CharacterClass();
            cc1.Elements.Add(new CharacterClassElement('b', 'y'));
            var cc2 = new CharacterClass();
            cc2.Elements.Add(new CharacterClassElement('b', 'v'));

            var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] { cc1, cc2 });

            CollectionAssert.AreEquivalent(new[]
            {
                new CharacterClassElement('b', 'v'),
                new CharacterClassElement('w', 'y')
            }, newMappings.Alphabet);
            CollectionAssert.AreEquivalent(new[]
            {
                new CharacterClassElement('b', 'v'),
                new CharacterClassElement('w', 'y')
            }, newMappings.Mapping[cc1].Elements);
            CollectionAssert.AreEquivalent(new[]
            {
                new CharacterClassElement('b', 'v'),
            }, newMappings.Mapping[cc2].Elements);
        }
Пример #5
0
 private CharacterClass ParseCharClass()
 {
     var cClass = new CharacterClass();
     switch (CurToken.Type)
     {
         case TokenType.OPENCLASS:
         case TokenType.OPENCLASSNEGATE:
             cClass.IsNegated = ParseCharClassOpening();
             cClass.Elements.UnionWith(ParseCharClassBody());
             AssertAndAdvance(TokenType.CLOSECLASS);
             return cClass;
         case TokenType.CHAR:
             var curC = CurCharToken.Character;
             cClass.Elements.Add(new CharacterClassElement(curC, curC));
             AdvanceInput();
             return cClass;
         default:
             throw new ParseException("Invalid input lookahead");
     }
 }
Пример #6
0
 private MatchFactorNode ParseMatchFactor()
 {
     switch (CurToken.Type)
     {
         case TokenType.OPENCLASS:
         case TokenType.OPENCLASSNEGATE:
         case TokenType.CHAR:
             var cClass = ParseCharClass();
             return new CharacterClassMatchNode(cClass);
         case TokenType.OPENGROUP:
         case TokenType.OPENGROUPNOCAP:
             var shouldCapture = ParseGroupOpening();
             var group = new GroupMatchNode(capturing: shouldCapture);
             group.Body = ParseRegexBody();
             AssertAndAdvance(TokenType.CLOSEGROUP);
             return group;
         case TokenType.ANY:
             var anyClass = new CharacterClass();
             anyClass.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue));
             AdvanceInput();
             return new CharacterClassMatchNode(anyClass);
         default:
             throw new ParseException("Invalid input lookahead");
     }
 }
Пример #7
0
 public CharacterClassMatchNode(CharacterClass cClass, UnaryOperatorType op = UnaryOperatorType.None,
     MatchFactorNode next = null)
     : base(op, next)
 {
     MatchingCharacterClass = cClass;
 }
        public void NegatedRangesDecompositionTest()
        {
            var ccn = new CharacterClass();
            ccn.IsNegated = true;
            ccn.Elements.Add(new CharacterClassElement('b', 'q'));
            var ccAll = new CharacterClass();
            ccAll.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue));
            var ccMiddle = new CharacterClass();
            ccMiddle.Elements.Add(new CharacterClassElement('k', 'm'));
            var ccEnd = new CharacterClass();
            ccEnd.Elements.Add(new CharacterClassElement('t', 'y'));

            var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {ccn, ccAll, ccMiddle, ccEnd});

            Assert.AreEqual(7, newMappings.Alphabet.Count);
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement(char.MinValue, 'a'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('b', 'j'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('k', 'm'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('n', 'q'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('r', 's'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('t', 'y'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('z', char.MaxValue));

            CollectionAssert.AreEquivalent(newMappings.Alphabet, newMappings.Mapping[ccAll].Elements);

            Assert.IsFalse(newMappings.Mapping[ccn].IsNegated);
            CollectionAssert.AreEquivalent(new[]
            {
                new CharacterClassElement(char.MinValue, 'a'),
                new CharacterClassElement('r', 's'),
                new CharacterClassElement('t', 'y'),
                new CharacterClassElement('z', char.MaxValue)
            }, newMappings.Mapping[ccn].Elements);

            CollectionAssert.AreEquivalent(new[]
            {
                new CharacterClassElement('k', 'm')
            }, newMappings.Mapping[ccMiddle].Elements);
        }
        public void SingleDecompositionTest()
        {
            var cc1 = new CharacterClass();
            cc1.Elements.Add(new CharacterClassElement('a', 'k'));
            cc1.Elements.Add(new CharacterClassElement('v', 'v'));
            var cc2 = new CharacterClass();
            cc2.Elements.Add(new CharacterClassElement('l', 'z'));
            cc2.Elements.Add(new CharacterClassElement('b', 'b'));

            var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {cc1, cc2});

            Assert.AreEqual(6, newMappings.Alphabet.Count);
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('a', 'a'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('b', 'b'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('c', 'k'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('l', 'u'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('v', 'v'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('w', 'z'));

            Assert.AreEqual(4, newMappings.Mapping[cc1].Elements.Count);
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('a', 'a'));
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('b', 'b'));
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('c', 'k'));
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('v', 'v'));

            Assert.AreEqual(4, newMappings.Mapping[cc2].Elements.Count);
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('l', 'u'));
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('v', 'v'));
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('w', 'z'));
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('b', 'b'));
        }
        public void SimpleDecompositionTest()
        {
            var cc1 = new CharacterClass();
            cc1.Elements.Add(new CharacterClassElement('a', 'z'));
            var cc2 = new CharacterClass();
            cc2.Elements.Add(new CharacterClassElement('f', 'q'));
            cc2.Elements.Add(new CharacterClassElement('j', 's'));

            var newMappings = CharacterClassMapper.NormaliseCharacterClasses(new[] {cc1, cc2});

            Assert.AreEqual(5, newMappings.Alphabet.Count);
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('a', 'e'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('f', 'i'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('j', 'q'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('r', 's'));
            CollectionAssert.Contains(newMappings.Alphabet, new CharacterClassElement('t', 'z'));

            Assert.AreEqual(5, newMappings.Mapping[cc1].Elements.Count);
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('a', 'e'));
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('f', 'i'));
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('j', 'q'));
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('r', 's'));
            CollectionAssert.Contains(newMappings.Mapping[cc1].Elements, new CharacterClassElement('t', 'z'));

            Assert.AreEqual(3, newMappings.Mapping[cc2].Elements.Count);
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('f', 'i'));
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('j', 'q'));
            CollectionAssert.Contains(newMappings.Mapping[cc2].Elements, new CharacterClassElement('r', 's'));
        }
Пример #11
0
 private void LinkStates(NFAState src, NFAState dst, CharacterClass cond)
 {
     foreach (var el in cond.Elements)
         LinkStates(src, dst, el);
 }
Пример #12
0
        public static NormaliseCharacterClassResult NormaliseCharacterClasses(IList<CharacterClass> classes)
        {
            var rStruct = new NormaliseCharacterClassResult()
            {
                Alphabet = new HashSet<CharacterClassElement>(),
                Mapping = new Dictionary<CharacterClass, CharacterClass>()
            };

            //Need to create the list of start/end class elements
            var rangeTokenList = classes.SelectMany(cl => cl.Elements)
                .SelectMany(el => new CharacterRangeToken[]
                {
                    new CharacterRangeToken()
                    {
                        Value  = el.Start,
                        SourceElement = el,
                        Type = TokenType.Start
                    },
                    new CharacterRangeToken()
                    {
                        Value = el.End,
                        SourceElement = el,
                        Type = TokenType.End
                    }
                })
                .OrderBy(tk => tk.Value) //stable sort, start tokens always before end tokens
                .ToList();

            //Now break up the ranges
            //Mapping of old char class els into new components
            var activeRanges = new Dictionary<CharacterClassElement, ISet<CharacterClassElement>>();
            var inactiveRanges = new Dictionary<CharacterClassElement, ISet<CharacterClassElement>>();

            var previousToken = new CharacterRangeToken()
            {
                Value = char.MinValue,
                Type = TokenType.Start
            };
            var first = true;

            foreach (var token in rangeTokenList)
            {
                //make our new class
                var newRange = new CharacterClassElement(previousToken.Value, token.Value);
                //work out [), (], etc
                if (previousToken.Type == TokenType.Start && token.Type == TokenType.Start)
                {
                    newRange = new CharacterClassElement(previousToken.Value, (char)(token.Value - 1));
                }
                else if (previousToken.Type == TokenType.End && token.Type == TokenType.End)
                {
                    newRange = new CharacterClassElement((char)(previousToken.Value + 1), token.Value);
                }
                else if (previousToken.Type == TokenType.End && token.Type == TokenType.Start)
                {
                    newRange = new CharacterClassElement((char)(previousToken.Value + 1), (char)(token.Value - 1));
                }

                //Make sure there's actually a worthwile range here
                //Also, skip first so we don't put the \0-token range in
                //If they have an any char class, this will get added by the logic anyway
                if (newRange.End >= newRange.Start && !first)
                {
                    rStruct.Alphabet.Add(newRange);

                    //Now attach it to anything that wants it
                    //Skipped on first iter
                    foreach (var ccel in activeRanges.Keys)
                        activeRanges[ccel].Add(newRange);
                }

                //Now add new source classes
                switch (token.Type)
                {
                    case TokenType.Start:
                        if (!activeRanges.ContainsKey(token.SourceElement))
                            activeRanges.Add(token.SourceElement, new HashSet<CharacterClassElement>());
                        break;
                    case TokenType.End:
                        if (activeRanges.ContainsKey(token.SourceElement))
                        {
                            inactiveRanges[token.SourceElement] = activeRanges[token.SourceElement];
                            activeRanges.Remove(token.SourceElement);
                        }
                        break;
                }

                previousToken = token;
                first = false;
            }

            //fix cclass mappings
            foreach (var cClass in classes)
            {
                rStruct.Mapping[cClass] = new CharacterClass()
                {
                    IsNegated = false
                };
                foreach (var newRange in cClass.Elements.SelectMany(el => inactiveRanges[el]))
                    rStruct.Mapping[cClass].Elements.Add(newRange);

                //If negated, swap to a non-negated class
                if (cClass.IsNegated)
                {
                    var newCClass = new CharacterClass();
                    newCClass.Elements.UnionWith(
                        rStruct.Alphabet.Except(rStruct.Mapping[cClass].Elements));
                    rStruct.Mapping[cClass] = newCClass;
                }
            }

            return rStruct;
        }