Ejemplo n.º 1
0
        public void SequenceMatcher()
        {
            var seq = new SequenceMatcher();

            var res = seq.MatchPassword("abcd");

            Assert.AreEqual(1, res.Count());
            var m1 = res.First();

            Assert.AreEqual(0, m1.Begin);
            Assert.AreEqual(3, m1.End);
            Assert.AreEqual("abcd", m1.Token);

            res = seq.MatchPassword("asdfabcdhujzyxwhgjj");
            Assert.AreEqual(2, res.Count());

            m1 = res.ElementAt(0);
            Assert.AreEqual(4, m1.Begin);
            Assert.AreEqual(7, m1.End);
            Assert.AreEqual("abcd", m1.Token);

            var m2 = res.ElementAt(1);

            Assert.AreEqual(11, m2.Begin);
            Assert.AreEqual(14, m2.End);
            Assert.AreEqual("zyxw", m2.Token);

            res = seq.MatchPassword("dfsjkhfjksdh");
            Assert.AreEqual(0, res.Count());
        }
Ejemplo n.º 2
0
    public static void Main(string[] args)
    {
        var n = new BigInteger(1);
        var t = new TernarySearchTree <char>();

        t.Add("1");

        for (int i = 1; i <= 800; i++)
        {
            n = n * 2;
            t.Add(n.ToString());
        }

        int m = Convert.ToInt32(Console.ReadLine());

        for (int v = 0; v < m; ++v)
        {
            string a = Console.ReadLine();

            var c = 0;
            for (int i = 0; i < a.Length; ++i)
            {
                var matcher = new SequenceMatcher(a, i);
                c += t.Match(matcher).Count();
            }

            Console.WriteLine(c);
        }
    }
        /// <summary>
        /// Given a sequence, applies our patterns over the sequence and returns
        /// all non overlapping matches.
        /// </summary>
        /// <remarks>
        /// Given a sequence, applies our patterns over the sequence and returns
        /// all non overlapping matches.  When multiple patterns overlaps,
        /// matched patterns are selected by order specified by the comparator
        /// </remarks>
        /// <param name="elements">input sequence to match against</param>
        /// <param name="cmp">comparator indicating order that overlapped sequences should be selected.</param>
        /// <returns>list of match results that are non-overlapping</returns>
        public virtual IList <ISequenceMatchResult <T> > FindNonOverlapping <_T0, _T1>(IList <_T0> elements, IComparator <_T1> cmp)
            where _T0 : T
        {
            ICollection <SequencePattern <T> > triggered = GetTriggeredPatterns(elements);
            IList <ISequenceMatchResult <T> >  all       = new List <ISequenceMatchResult <T> >();
            int i = 0;

            foreach (SequencePattern <T> p in triggered)
            {
                if (Thread.Interrupted())
                {
                    // Allow interrupting
                    throw new RuntimeInterruptedException();
                }
                SequenceMatcher <T> m = p.GetMatcher(elements);
                m.SetMatchWithResult(matchWithResult);
                m.SetOrder(i);
                while (m.Find())
                {
                    all.Add(m.ToBasicSequenceMatchResult());
                }
                i++;
            }
            IList <ISequenceMatchResult <T> > res = IntervalTree.GetNonOverlapping(all, SequenceMatchResultConstants.ToInterval, cmp);

            res.Sort(SequenceMatchResultConstants.OffsetComparator);
            return(res);
        }
Ejemplo n.º 4
0
        public void NoSequence()
        {
            var seq = new SequenceMatcher();

            var res = seq.MatchPassword("dfsjkhfjksdh").ToList();

            res.Should().BeEmpty();
        }
Ejemplo n.º 5
0
        public static BDiffBlock[] GetBlocks(byte[] source, byte[] destination)
        {
            var a = source.Split((byte)'\n');
            var b = destination.Split((byte)'\n');

            var matches = new SequenceMatcher<Segment>(a, b, (l, r) => l.Equals(r)).GetMatchingBlocks();
        
            return matches.Select(m => new BDiffBlock(m.SourceIndex, m.SourceIndex + m.Length, m.DestinationIndex, m.DestinationIndex + m.Length)).ToArray();
        }
Ejemplo n.º 6
0
        public static byte[] Diff(byte[] source, byte[] destination)
        {
            var ms = new MemoryStream();
            var bw = new BigEndianBinaryWriter(ms);

            if(source == null || source.Length == 0)
            {
                bw.Write((uint)0);
                bw.Write((uint)0);
                bw.Write((uint)destination.Length);
                bw.Write(destination);

                bw.Flush();

                return ms.ToArray();
            } // if

            var a = source.Split((byte)'\n');
            var b = destination.Split((byte)'\n');

            var p = new List<int> { 0 };
            Array.ForEach(a, s => p.Add(p[p.Count - 1] + s.Length));

            var d = new SequenceMatcher<Segment>(a, b, (l, r) => l.Equals(r)).GetMatchingBlocks();

            int la = 0, lb = 0;

            foreach(var x in d)
            {
                int am = x.SourceIndex, bm = x.DestinationIndex, size = x.Length;
                var sz = 
                    (lb == bm && lb == 0) ?
                    0 :
                    Enumerable.Range(lb, bm - lb).Select(i => b[i]).Sum(w => w.Length);

                if(am > la || sz > 0)
                {
                    bw.Write((uint)p[la]);
                    bw.Write((uint)p[am]);
                    bw.Write((uint)sz);

                    if(sz > 0)
                    {
                        for(var z = lb; z < bm; ++z)
                            bw.Write(destination, b[z].Offset, b[z].Length);
                    } // if
                } // if

                la = am + size;
                lb = bm + size;
            } // foreach

            bw.Flush();

            return ms.ToArray();
        }
Ejemplo n.º 7
0
        public void SingleSequence()
        {
            var seq = new SequenceMatcher();

            var res = seq.MatchPassword("abcd").ToList();

            res.Count.Should().Be(1);
            res[0].i.Should().Be(0);
            res[0].j.Should().Be(3);
            res[0].Token.Should().Be("abcd");
        }
            public override object MatchWithResult(IList <T> list)
            {
                SequenceMatcher <T> m = pattern.GetMatcher(list);

                if (m.Matches())
                {
                    return(m.ToBasicSequenceMatchResult());
                }
                else
                {
                    return(null);
                }
            }
Ejemplo n.º 9
0
            public ISequenceMatchResult <T> Apply(ISequenceMatchResult <T> seqMatchResult, params int[] groups)
            {
                SequenceMatcher <T> matcher = pattern.GetMatcher(seqMatchResult.Elements());

                if (matcher.Find())
                {
                    return(matcher);
                }
                else
                {
                    return(null);
                }
            }
        /// <summary>
        /// Given a sequence, applies each of our patterns over the sequence and returns
        /// all non overlapping matches for each of the patterns.
        /// </summary>
        /// <remarks>
        /// Given a sequence, applies each of our patterns over the sequence and returns
        /// all non overlapping matches for each of the patterns.
        /// Unlike #findAllNonOverlapping, overlapping matches from different patterns are kept.
        /// </remarks>
        /// <param name="elements">input sequence to match against</param>
        /// <returns>iterable of match results that are non-overlapping</returns>
        public virtual IEnumerable <ISequenceMatchResult <T> > FindAllNonOverlappingMatchesPerPattern <_T0>(IList <_T0> elements)
            where _T0 : T
        {
            ICollection <SequencePattern <T> > triggered = GetTriggeredPatterns(elements);
            IList <IEnumerable <ISequenceMatchResult <T> > > allMatches = new List <IEnumerable <ISequenceMatchResult <T> > >(elements.Count);

            foreach (SequencePattern <T> p in triggered)
            {
                SequenceMatcher <T> m = p.GetMatcher(elements);
                m.SetMatchWithResult(matchWithResult);
                IEnumerable <ISequenceMatchResult <T> > matches = m.FindAllNonOverlapping();
                allMatches.Add(matches);
            }
            return(Iterables.Chain(allMatches));
        }
Ejemplo n.º 11
0
        public void MultipleSequence()
        {
            var seq = new SequenceMatcher();

            var res = seq.MatchPassword("asdfabcdhujzyxwhgjj").ToList();

            res.Count.Should().Be(2);

            res[0].i.Should().Be(4);
            res[0].j.Should().Be(7);
            res[0].Token.Should().Be("abcd");

            res[1].i.Should().Be(11);
            res[1].j.Should().Be(14);
            res[1].Token.Should().Be("zyxw");
        }
Ejemplo n.º 12
0
 public ISequenceMatchResult <T> Apply(ISequenceMatchResult <T> seqMatchResult, params int[] groups)
 {
     if (seqMatchResult is SequenceMatcher)
     {
         SequenceMatcher <T> matcher = (SequenceMatcher <T>)seqMatchResult;
         if (matcher.Find())
         {
             return(matcher);
         }
         else
         {
             return(null);
         }
     }
     else
     {
         return(null);
     }
 }
        /// <summary>
        /// Given a sequence, applies our patterns over the sequence and returns
        /// all non overlapping matches.
        /// </summary>
        /// <remarks>
        /// Given a sequence, applies our patterns over the sequence and returns
        /// all non overlapping matches.  When multiple patterns overlaps,
        /// matched patterns are selected to give the overall maximum score.
        /// </remarks>
        /// <param name="elements">input sequence to match against</param>
        /// <param name="scorer">scorer for scoring each match</param>
        /// <returns>list of match results that are non-overlapping</returns>
        public virtual IList <ISequenceMatchResult <T> > FindNonOverlappingMaxScore <_T0, _T1>(IList <_T0> elements, IToDoubleFunction <_T1> scorer)
            where _T0 : T
        {
            ICollection <SequencePattern <T> > triggered = GetTriggeredPatterns(elements);
            IList <ISequenceMatchResult <T> >  all       = new List <ISequenceMatchResult <T> >();
            int i = 0;

            foreach (SequencePattern <T> p in triggered)
            {
                SequenceMatcher <T> m = p.GetMatcher(elements);
                m.SetMatchWithResult(matchWithResult);
                m.SetOrder(i);
                while (m.Find())
                {
                    all.Add(m.ToBasicSequenceMatchResult());
                }
                i++;
            }
            IList <ISequenceMatchResult <T> > res = IntervalTree.GetNonOverlappingMaxScore(all, SequenceMatchResultConstants.ToInterval, scorer);

            res.Sort(SequenceMatchResultConstants.OffsetComparator);
            return(res);
        }
        /// <summary>
        /// Returns a List of Lists where each element is built from a run
        /// of Words in the input Document.
        /// </summary>
        /// <remarks>
        /// Returns a List of Lists where each element is built from a run
        /// of Words in the input Document. Specifically, reads through each word in
        /// the input document and breaks off a sentence after finding a valid
        /// sentence boundary token or end of file.
        /// Note that for this to work, the words in the
        /// input document must have been tokenized with a tokenizer that makes
        /// sentence boundary tokens their own tokens (e.g.,
        /// <see cref="PTBTokenizer{T}"/>
        /// ).
        /// </remarks>
        /// <param name="words">A list of already tokenized words (must implement HasWord or be a String).</param>
        /// <returns>A list of sentences.</returns>
        /// <seealso cref="WordToSentenceProcessor{IN}.WordToSentenceProcessor(string, string, Java.Util.ISet{E}, Java.Util.ISet{E}, string, NewlineIsSentenceBreak, Edu.Stanford.Nlp.Ling.Tokensregex.SequencePattern{T}, Java.Util.ISet{E}, bool, bool)"/>
        private IList <IList <In> > WordsToSentences <_T0>(IList <_T0> words)
            where _T0 : IN
        {
            IdentityHashMap <object, bool> isSentenceBoundary = null;

            // is null unless used by sentenceBoundaryMultiTokenPattern
            if (sentenceBoundaryMultiTokenPattern != null)
            {
                // Do initial pass using TokensRegex to identify multi token patterns that need to be matched
                // and add the last token of a match to our table of sentence boundary tokens.
                isSentenceBoundary = new IdentityHashMap <object, bool>();
                SequenceMatcher <In> matcher = sentenceBoundaryMultiTokenPattern.GetMatcher(words);
                while (matcher.Find())
                {
                    IList <In> nodes = matcher.GroupNodes();
                    if (nodes != null && !nodes.IsEmpty())
                    {
                        isSentenceBoundary[nodes[nodes.Count - 1]] = true;
                    }
                }
            }
            // Split tokens into sentences!!!
            IList <IList <In> > sentences       = Generics.NewArrayList();
            IList <In>          currentSentence = new List <In>();
            IList <In>          lastSentence    = null;
            bool insideRegion          = false;
            bool inWaitForForcedEnd    = false;
            bool lastTokenWasNewline   = false;
            bool lastSentenceEndForced = false;

            foreach (IN o in words)
            {
                string word      = GetString(o);
                bool   forcedEnd = IsForcedEndToken(o);
                // if (DEBUG) { if (forcedEnd) { log.info("Word is " + word + "; marks forced end of sentence [cont.]"); } }
                bool inMultiTokenExpr = false;
                bool discardToken     = false;
                if (o is ICoreMap)
                {
                    // Hacky stuff to ensure sentence breaks do not happen in certain cases
                    ICoreMap cm = (ICoreMap)o;
                    if (!forcedEnd)
                    {
                        bool forcedUntilEndValue = cm.Get(typeof(CoreAnnotations.ForcedSentenceUntilEndAnnotation));
                        if (forcedUntilEndValue != null && forcedUntilEndValue)
                        {
                            // if (DEBUG) { log.info("Word is " + word + "; starting wait for forced end of sentence [cont.]"); }
                            inWaitForForcedEnd = true;
                        }
                        else
                        {
                            MultiTokenTag mt = cm.Get(typeof(CoreAnnotations.MentionTokenAnnotation));
                            if (mt != null && !mt.IsEnd())
                            {
                                // In the middle of a multi token mention, make sure sentence is not ended here
                                // if (DEBUG) { log.info("Word is " + word + "; inside multi-token mention [cont.]"); }
                                inMultiTokenExpr = true;
                            }
                        }
                    }
                }
                if (tokenPatternsToDiscard != null)
                {
                    discardToken = MatchesTokenPatternsToDiscard(word);
                }
                if (sentenceRegionBeginPattern != null && !insideRegion)
                {
                    if (sentenceRegionBeginPattern.Matcher(word).Matches())
                    {
                        insideRegion = true;
                    }
                    lastTokenWasNewline = false;
                    continue;
                }
                if (!lastSentenceEndForced && lastSentence != null && currentSentence.IsEmpty() && !lastTokenWasNewline && sentenceBoundaryFollowersPattern.Matcher(word).Matches())
                {
                    if (!discardToken)
                    {
                        lastSentence.Add(o);
                    }
                    lastTokenWasNewline = false;
                    continue;
                }
                bool   newSentForced = false;
                bool   newSent       = false;
                string debugText     = (discardToken) ? "discarded" : "added to current";
                if (inWaitForForcedEnd && !forcedEnd)
                {
                    if (sentenceBoundaryToDiscard.Contains(word))
                    {
                        // there can be newlines even in something to keep together
                        discardToken = true;
                    }
                    if (!discardToken)
                    {
                        currentSentence.Add(o);
                    }
                }
                else
                {
                    if (inMultiTokenExpr && !forcedEnd)
                    {
                        if (!discardToken)
                        {
                            currentSentence.Add(o);
                        }
                    }
                    else
                    {
                        if (sentenceBoundaryToDiscard.Contains(word))
                        {
                            if (forcedEnd)
                            {
                                // sentence boundary can easily be forced end
                                inWaitForForcedEnd = false;
                                newSentForced      = true;
                            }
                            else
                            {
                                if (newlineIsSentenceBreak == WordToSentenceProcessor.NewlineIsSentenceBreak.Always)
                                {
                                    newSentForced = true;
                                }
                                else
                                {
                                    if (newlineIsSentenceBreak == WordToSentenceProcessor.NewlineIsSentenceBreak.TwoConsecutive && lastTokenWasNewline)
                                    {
                                        newSentForced = true;
                                    }
                                }
                            }
                            lastTokenWasNewline = true;
                        }
                        else
                        {
                            lastTokenWasNewline = false;
                            bool isb;
                            if (xmlBreakElementsToDiscard != null && MatchesXmlBreakElementToDiscard(word))
                            {
                                newSentForced = true;
                            }
                            else
                            {
                                if (sentenceRegionEndPattern != null && sentenceRegionEndPattern.Matcher(word).Matches())
                                {
                                    insideRegion  = false;
                                    newSentForced = true;
                                }
                                else
                                {
                                    // Marked sentence boundaries
                                    if ((isSentenceBoundary != null) && ((isb = isSentenceBoundary[o]) != null) && isb)
                                    {
                                        if (!discardToken)
                                        {
                                            currentSentence.Add(o);
                                        }
                                        newSent = true;
                                    }
                                    else
                                    {
                                        if (sentenceBoundaryTokenPattern.Matcher(word).Matches())
                                        {
                                            if (!discardToken)
                                            {
                                                currentSentence.Add(o);
                                            }
                                            newSent = true;
                                        }
                                        else
                                        {
                                            if (forcedEnd)
                                            {
                                                if (!discardToken)
                                                {
                                                    currentSentence.Add(o);
                                                }
                                                inWaitForForcedEnd = false;
                                                newSentForced      = true;
                                            }
                                            else
                                            {
                                                if (!discardToken)
                                                {
                                                    currentSentence.Add(o);
                                                }
                                                // chris added this next test in 2017; a bit weird, but KBP setup doesn't have newline in sentenceBoundary patterns, just in toDiscard
                                                if (AbstractTokenizer.NewlineToken.Equals(word))
                                                {
                                                    lastTokenWasNewline = true;
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                if ((newSentForced || newSent) && (!currentSentence.IsEmpty() || allowEmptySentences))
                {
                    sentences.Add(currentSentence);
                    // adds this sentence now that it's complete
                    lastSentenceEndForced = ((lastSentence == null || lastSentence.IsEmpty()) && lastSentenceEndForced) || newSentForced;
                    lastSentence          = currentSentence;
                    currentSentence       = new List <In>();
                }
                else
                {
                    // clears the current sentence
                    if (newSentForced)
                    {
                        lastSentenceEndForced = true;
                    }
                }
            }
            // add any words at the end, even if there isn't a sentence
            // terminator at the end of file
            if (!currentSentence.IsEmpty())
            {
                sentences.Add(currentSentence);
            }
            // adds last sentence
            return(sentences);
        }