public void SequenceMatcher() { var seq = new SequenceMatcher(); var res = seq.MatchPassword("abcd"); Assert.AreEqual(1, res.Count()); var m1 = res.First(); Assert.AreEqual(0, m1.Begin); Assert.AreEqual(3, m1.End); Assert.AreEqual("abcd", m1.Token); res = seq.MatchPassword("asdfabcdhujzyxwhgjj"); Assert.AreEqual(2, res.Count()); m1 = res.ElementAt(0); Assert.AreEqual(4, m1.Begin); Assert.AreEqual(7, m1.End); Assert.AreEqual("abcd", m1.Token); var m2 = res.ElementAt(1); Assert.AreEqual(11, m2.Begin); Assert.AreEqual(14, m2.End); Assert.AreEqual("zyxw", m2.Token); res = seq.MatchPassword("dfsjkhfjksdh"); Assert.AreEqual(0, res.Count()); }
public static void Main(string[] args) { var n = new BigInteger(1); var t = new TernarySearchTree <char>(); t.Add("1"); for (int i = 1; i <= 800; i++) { n = n * 2; t.Add(n.ToString()); } int m = Convert.ToInt32(Console.ReadLine()); for (int v = 0; v < m; ++v) { string a = Console.ReadLine(); var c = 0; for (int i = 0; i < a.Length; ++i) { var matcher = new SequenceMatcher(a, i); c += t.Match(matcher).Count(); } Console.WriteLine(c); } }
/// <summary> /// Given a sequence, applies our patterns over the sequence and returns /// all non overlapping matches. /// </summary> /// <remarks> /// Given a sequence, applies our patterns over the sequence and returns /// all non overlapping matches. When multiple patterns overlaps, /// matched patterns are selected by order specified by the comparator /// </remarks> /// <param name="elements">input sequence to match against</param> /// <param name="cmp">comparator indicating order that overlapped sequences should be selected.</param> /// <returns>list of match results that are non-overlapping</returns> public virtual IList <ISequenceMatchResult <T> > FindNonOverlapping <_T0, _T1>(IList <_T0> elements, IComparator <_T1> cmp) where _T0 : T { ICollection <SequencePattern <T> > triggered = GetTriggeredPatterns(elements); IList <ISequenceMatchResult <T> > all = new List <ISequenceMatchResult <T> >(); int i = 0; foreach (SequencePattern <T> p in triggered) { if (Thread.Interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } SequenceMatcher <T> m = p.GetMatcher(elements); m.SetMatchWithResult(matchWithResult); m.SetOrder(i); while (m.Find()) { all.Add(m.ToBasicSequenceMatchResult()); } i++; } IList <ISequenceMatchResult <T> > res = IntervalTree.GetNonOverlapping(all, SequenceMatchResultConstants.ToInterval, cmp); res.Sort(SequenceMatchResultConstants.OffsetComparator); return(res); }
public void NoSequence() { var seq = new SequenceMatcher(); var res = seq.MatchPassword("dfsjkhfjksdh").ToList(); res.Should().BeEmpty(); }
public static BDiffBlock[] GetBlocks(byte[] source, byte[] destination) { var a = source.Split((byte)'\n'); var b = destination.Split((byte)'\n'); var matches = new SequenceMatcher<Segment>(a, b, (l, r) => l.Equals(r)).GetMatchingBlocks(); return matches.Select(m => new BDiffBlock(m.SourceIndex, m.SourceIndex + m.Length, m.DestinationIndex, m.DestinationIndex + m.Length)).ToArray(); }
public static byte[] Diff(byte[] source, byte[] destination) { var ms = new MemoryStream(); var bw = new BigEndianBinaryWriter(ms); if(source == null || source.Length == 0) { bw.Write((uint)0); bw.Write((uint)0); bw.Write((uint)destination.Length); bw.Write(destination); bw.Flush(); return ms.ToArray(); } // if var a = source.Split((byte)'\n'); var b = destination.Split((byte)'\n'); var p = new List<int> { 0 }; Array.ForEach(a, s => p.Add(p[p.Count - 1] + s.Length)); var d = new SequenceMatcher<Segment>(a, b, (l, r) => l.Equals(r)).GetMatchingBlocks(); int la = 0, lb = 0; foreach(var x in d) { int am = x.SourceIndex, bm = x.DestinationIndex, size = x.Length; var sz = (lb == bm && lb == 0) ? 0 : Enumerable.Range(lb, bm - lb).Select(i => b[i]).Sum(w => w.Length); if(am > la || sz > 0) { bw.Write((uint)p[la]); bw.Write((uint)p[am]); bw.Write((uint)sz); if(sz > 0) { for(var z = lb; z < bm; ++z) bw.Write(destination, b[z].Offset, b[z].Length); } // if } // if la = am + size; lb = bm + size; } // foreach bw.Flush(); return ms.ToArray(); }
public void SingleSequence() { var seq = new SequenceMatcher(); var res = seq.MatchPassword("abcd").ToList(); res.Count.Should().Be(1); res[0].i.Should().Be(0); res[0].j.Should().Be(3); res[0].Token.Should().Be("abcd"); }
public override object MatchWithResult(IList <T> list) { SequenceMatcher <T> m = pattern.GetMatcher(list); if (m.Matches()) { return(m.ToBasicSequenceMatchResult()); } else { return(null); } }
public ISequenceMatchResult <T> Apply(ISequenceMatchResult <T> seqMatchResult, params int[] groups) { SequenceMatcher <T> matcher = pattern.GetMatcher(seqMatchResult.Elements()); if (matcher.Find()) { return(matcher); } else { return(null); } }
/// <summary> /// Given a sequence, applies each of our patterns over the sequence and returns /// all non overlapping matches for each of the patterns. /// </summary> /// <remarks> /// Given a sequence, applies each of our patterns over the sequence and returns /// all non overlapping matches for each of the patterns. /// Unlike #findAllNonOverlapping, overlapping matches from different patterns are kept. /// </remarks> /// <param name="elements">input sequence to match against</param> /// <returns>iterable of match results that are non-overlapping</returns> public virtual IEnumerable <ISequenceMatchResult <T> > FindAllNonOverlappingMatchesPerPattern <_T0>(IList <_T0> elements) where _T0 : T { ICollection <SequencePattern <T> > triggered = GetTriggeredPatterns(elements); IList <IEnumerable <ISequenceMatchResult <T> > > allMatches = new List <IEnumerable <ISequenceMatchResult <T> > >(elements.Count); foreach (SequencePattern <T> p in triggered) { SequenceMatcher <T> m = p.GetMatcher(elements); m.SetMatchWithResult(matchWithResult); IEnumerable <ISequenceMatchResult <T> > matches = m.FindAllNonOverlapping(); allMatches.Add(matches); } return(Iterables.Chain(allMatches)); }
public void MultipleSequence() { var seq = new SequenceMatcher(); var res = seq.MatchPassword("asdfabcdhujzyxwhgjj").ToList(); res.Count.Should().Be(2); res[0].i.Should().Be(4); res[0].j.Should().Be(7); res[0].Token.Should().Be("abcd"); res[1].i.Should().Be(11); res[1].j.Should().Be(14); res[1].Token.Should().Be("zyxw"); }
public ISequenceMatchResult <T> Apply(ISequenceMatchResult <T> seqMatchResult, params int[] groups) { if (seqMatchResult is SequenceMatcher) { SequenceMatcher <T> matcher = (SequenceMatcher <T>)seqMatchResult; if (matcher.Find()) { return(matcher); } else { return(null); } } else { return(null); } }
/// <summary> /// Given a sequence, applies our patterns over the sequence and returns /// all non overlapping matches. /// </summary> /// <remarks> /// Given a sequence, applies our patterns over the sequence and returns /// all non overlapping matches. When multiple patterns overlaps, /// matched patterns are selected to give the overall maximum score. /// </remarks> /// <param name="elements">input sequence to match against</param> /// <param name="scorer">scorer for scoring each match</param> /// <returns>list of match results that are non-overlapping</returns> public virtual IList <ISequenceMatchResult <T> > FindNonOverlappingMaxScore <_T0, _T1>(IList <_T0> elements, IToDoubleFunction <_T1> scorer) where _T0 : T { ICollection <SequencePattern <T> > triggered = GetTriggeredPatterns(elements); IList <ISequenceMatchResult <T> > all = new List <ISequenceMatchResult <T> >(); int i = 0; foreach (SequencePattern <T> p in triggered) { SequenceMatcher <T> m = p.GetMatcher(elements); m.SetMatchWithResult(matchWithResult); m.SetOrder(i); while (m.Find()) { all.Add(m.ToBasicSequenceMatchResult()); } i++; } IList <ISequenceMatchResult <T> > res = IntervalTree.GetNonOverlappingMaxScore(all, SequenceMatchResultConstants.ToInterval, scorer); res.Sort(SequenceMatchResultConstants.OffsetComparator); return(res); }
/// <summary> /// Returns a List of Lists where each element is built from a run /// of Words in the input Document. /// </summary> /// <remarks> /// Returns a List of Lists where each element is built from a run /// of Words in the input Document. Specifically, reads through each word in /// the input document and breaks off a sentence after finding a valid /// sentence boundary token or end of file. /// Note that for this to work, the words in the /// input document must have been tokenized with a tokenizer that makes /// sentence boundary tokens their own tokens (e.g., /// <see cref="PTBTokenizer{T}"/> /// ). /// </remarks> /// <param name="words">A list of already tokenized words (must implement HasWord or be a String).</param> /// <returns>A list of sentences.</returns> /// <seealso cref="WordToSentenceProcessor{IN}.WordToSentenceProcessor(string, string, Java.Util.ISet{E}, Java.Util.ISet{E}, string, NewlineIsSentenceBreak, Edu.Stanford.Nlp.Ling.Tokensregex.SequencePattern{T}, Java.Util.ISet{E}, bool, bool)"/> private IList <IList <In> > WordsToSentences <_T0>(IList <_T0> words) where _T0 : IN { IdentityHashMap <object, bool> isSentenceBoundary = null; // is null unless used by sentenceBoundaryMultiTokenPattern if (sentenceBoundaryMultiTokenPattern != null) { // Do initial pass using TokensRegex to identify multi token patterns that need to be matched // and add the last token of a match to our table of sentence boundary tokens. isSentenceBoundary = new IdentityHashMap <object, bool>(); SequenceMatcher <In> matcher = sentenceBoundaryMultiTokenPattern.GetMatcher(words); while (matcher.Find()) { IList <In> nodes = matcher.GroupNodes(); if (nodes != null && !nodes.IsEmpty()) { isSentenceBoundary[nodes[nodes.Count - 1]] = true; } } } // Split tokens into sentences!!! IList <IList <In> > sentences = Generics.NewArrayList(); IList <In> currentSentence = new List <In>(); IList <In> lastSentence = null; bool insideRegion = false; bool inWaitForForcedEnd = false; bool lastTokenWasNewline = false; bool lastSentenceEndForced = false; foreach (IN o in words) { string word = GetString(o); bool forcedEnd = IsForcedEndToken(o); // if (DEBUG) { if (forcedEnd) { log.info("Word is " + word + "; marks forced end of sentence [cont.]"); } } bool inMultiTokenExpr = false; bool discardToken = false; if (o is ICoreMap) { // Hacky stuff to ensure sentence breaks do not happen in certain cases ICoreMap cm = (ICoreMap)o; if (!forcedEnd) { bool forcedUntilEndValue = cm.Get(typeof(CoreAnnotations.ForcedSentenceUntilEndAnnotation)); if (forcedUntilEndValue != null && forcedUntilEndValue) { // if (DEBUG) { log.info("Word is " + word + "; starting wait for forced end of sentence [cont.]"); } inWaitForForcedEnd = true; } else { MultiTokenTag mt = cm.Get(typeof(CoreAnnotations.MentionTokenAnnotation)); if (mt != null && !mt.IsEnd()) { // In the middle of a multi token mention, make sure sentence is not ended here // if (DEBUG) { log.info("Word is " + word + "; inside multi-token mention [cont.]"); } inMultiTokenExpr = true; } } } } if (tokenPatternsToDiscard != null) { discardToken = MatchesTokenPatternsToDiscard(word); } if (sentenceRegionBeginPattern != null && !insideRegion) { if (sentenceRegionBeginPattern.Matcher(word).Matches()) { insideRegion = true; } lastTokenWasNewline = false; continue; } if (!lastSentenceEndForced && lastSentence != null && currentSentence.IsEmpty() && !lastTokenWasNewline && sentenceBoundaryFollowersPattern.Matcher(word).Matches()) { if (!discardToken) { lastSentence.Add(o); } lastTokenWasNewline = false; continue; } bool newSentForced = false; bool newSent = false; string debugText = (discardToken) ? "discarded" : "added to current"; if (inWaitForForcedEnd && !forcedEnd) { if (sentenceBoundaryToDiscard.Contains(word)) { // there can be newlines even in something to keep together discardToken = true; } if (!discardToken) { currentSentence.Add(o); } } else { if (inMultiTokenExpr && !forcedEnd) { if (!discardToken) { currentSentence.Add(o); } } else { if (sentenceBoundaryToDiscard.Contains(word)) { if (forcedEnd) { // sentence boundary can easily be forced end inWaitForForcedEnd = false; newSentForced = true; } else { if (newlineIsSentenceBreak == WordToSentenceProcessor.NewlineIsSentenceBreak.Always) { newSentForced = true; } else { if (newlineIsSentenceBreak == WordToSentenceProcessor.NewlineIsSentenceBreak.TwoConsecutive && lastTokenWasNewline) { newSentForced = true; } } } lastTokenWasNewline = true; } else { lastTokenWasNewline = false; bool isb; if (xmlBreakElementsToDiscard != null && MatchesXmlBreakElementToDiscard(word)) { newSentForced = true; } else { if (sentenceRegionEndPattern != null && sentenceRegionEndPattern.Matcher(word).Matches()) { insideRegion = false; newSentForced = true; } else { // Marked sentence boundaries if ((isSentenceBoundary != null) && ((isb = isSentenceBoundary[o]) != null) && isb) { if (!discardToken) { currentSentence.Add(o); } newSent = true; } else { if (sentenceBoundaryTokenPattern.Matcher(word).Matches()) { if (!discardToken) { currentSentence.Add(o); } newSent = true; } else { if (forcedEnd) { if (!discardToken) { currentSentence.Add(o); } inWaitForForcedEnd = false; newSentForced = true; } else { if (!discardToken) { currentSentence.Add(o); } // chris added this next test in 2017; a bit weird, but KBP setup doesn't have newline in sentenceBoundary patterns, just in toDiscard if (AbstractTokenizer.NewlineToken.Equals(word)) { lastTokenWasNewline = true; } } } } } } } } } if ((newSentForced || newSent) && (!currentSentence.IsEmpty() || allowEmptySentences)) { sentences.Add(currentSentence); // adds this sentence now that it's complete lastSentenceEndForced = ((lastSentence == null || lastSentence.IsEmpty()) && lastSentenceEndForced) || newSentForced; lastSentence = currentSentence; currentSentence = new List <In>(); } else { // clears the current sentence if (newSentForced) { lastSentenceEndForced = true; } } } // add any words at the end, even if there isn't a sentence // terminator at the end of file if (!currentSentence.IsEmpty()) { sentences.Add(currentSentence); } // adds last sentence return(sentences); }