protected override IEnumerable<TData> ApplyImpl(TData input, TOffset start) { var results = new List<TData>(); foreach (Match<TData, TOffset> match in Matcher.AllMatches(input, start)) results.Add(RuleSpec.ApplyRhs(this, match)); return results; }
protected override IEnumerable <TData> ApplyImpl(TData input, TOffset start) { TData data = input; foreach (Match <TData, TOffset> match in Matcher.AllMatches(input, start).ToArray()) { RuleSpec.ApplyRhs(this, match, out data); } return(data.ToEnumerable()); }
protected override IEnumerable <TData> ApplyImpl(TData input, TOffset start) { var results = new List <TData>(); foreach (Match <TData, TOffset> match in Matcher.AllMatches(input, start)) { TData outputData; RuleSpec.ApplyRhs(this, match, out outputData); results.Add(outputData); } return(results); }
public override IEnumerable <Word> Apply(Word input) { var matches = new List <Tuple <Match <Word, ShapeNode>, PhonologicalSubruleMatch> >(); foreach (Match <Word, ShapeNode> targetMatch in Matcher.AllMatches(input)) { PhonologicalSubruleMatch srMatch; if (_ruleSpec.MatchSubrule(this, targetMatch, out srMatch)) { matches.Add(Tuple.Create(targetMatch, srMatch)); } } foreach (Tuple <Match <Word, ShapeNode>, PhonologicalSubruleMatch> match in matches) { match.Item2.SubruleSpec.ApplyRhs(match.Item1, match.Item2.Range, match.Item2.VariableBindings); } return(input.ToEnumerable()); }
public void Subpattern() { AnnotatedStringData sentence = CreateStringData("the old, angry man slept well."); Pattern<AnnotatedStringData, int> pattern = Pattern<AnnotatedStringData, int>.New() .Subpattern("unvoiceInitial", unvoiceInitial => unvoiceInitial .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("voice-").Symbol("son-").Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl-").Value).ZeroOrMore .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value)) .Subpattern("word", word => word .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl-").Value).ZeroOrMore .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl-").Value).ZeroOrMore).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 19)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); Match<AnnotatedStringData, int> match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 3), match.Span); Assert.AreEqual("unvoiceInitial", match.PatternPath[0]); match = matcher.Match(sentence, 19); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(19, 22), match.Span); Assert.AreEqual("unvoiceInitial", match.PatternPath[0]); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); Match<AnnotatedStringData, int>[] matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(6, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.AreEqual("unvoiceInitial", matches[0].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(4, 7), matches[1].Span); Assert.AreEqual("word", matches[1].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(25, 29), matches[5].Span); Assert.AreEqual("word", matches[5].PatternPath[0]); matches = matcher.Matches(sentence, 19).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual(SpanFactory.Create(19, 22), matches[0].Span); Assert.AreEqual("unvoiceInitial", matches[0].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(25, 29), matches[1].Span); Assert.AreEqual("word", matches[1].PatternPath[0]); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(32, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.AreEqual("unvoiceInitial", matches[0].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(0, 3), matches[1].Span); Assert.AreEqual("word", matches[1].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(1, 3), matches[2].Span); Assert.AreEqual("word", matches[2].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(16, 17), matches[15].Span); Assert.AreEqual("word", matches[15].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(26, 27), matches[31].Span); Assert.AreEqual("word", matches[31].PatternPath[0]); matches = matcher.AllMatches(sentence, 19).ToArray(); Assert.AreEqual(16, matches.Length); Assert.AreEqual(SpanFactory.Create(19, 22), matches[0].Span); Assert.AreEqual("unvoiceInitial", matches[0].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(19, 24), matches[1].Span); Assert.AreEqual("word", matches[1].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(19, 23), matches[2].Span); Assert.AreEqual("word", matches[2].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(21, 24), matches[7].Span); Assert.AreEqual("word", matches[7].PatternPath[0]); Assert.AreEqual(SpanFactory.Create(26, 27), matches[15].Span); Assert.AreEqual("word", matches[15].PatternPath[0]); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); }
public void SimplePattern() { AnnotatedStringData sentence = CreateStringData("the old, angry man slept well."); Pattern<AnnotatedStringData, int> pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl-").Value).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); Match<AnnotatedStringData, int> match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 1), match.Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(10, 11), match.Span); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); Match<AnnotatedStringData, int>[] matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(17, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 1), matches[0].Span); Assert.AreEqual(SpanFactory.Create(13, 14), matches[7].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[16].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(13, matches.Length); Assert.AreEqual(SpanFactory.Create(10, 11), matches[0].Span); Assert.AreEqual(SpanFactory.Create(17, 18), matches[5].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[12].Span); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(17, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 1), matches[0].Span); Assert.AreEqual(SpanFactory.Create(13, 14), matches[7].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[16].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(13, matches.Length); Assert.AreEqual(SpanFactory.Create(10, 11), matches[0].Span); Assert.AreEqual(SpanFactory.Create(17, 18), matches[5].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[12].Span); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); sentence.Annotations.Add(0, 3, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("det").Value); sentence.Annotations.Add(4, 7, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(9, 14, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(15, 18, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value); sentence.Annotations.Add(19, 24, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("verb").Value); sentence.Annotations.Add(25, 29, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adv").Value); sentence.Annotations.Add(0, 18, FeatureStruct.New().Symbol(NP).Value); sentence.Annotations.Add(19, 29, FeatureStruct.New().Symbol(VP).Value); pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl-").Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(1, 3), match.Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(15, 17), match.Span); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(4, matches.Length); Assert.AreEqual(SpanFactory.Create(1, 3), matches[0].Span); Assert.AreEqual(SpanFactory.Create(15, 17), matches[1].Span); Assert.AreEqual(SpanFactory.Create(25, 27), matches[3].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(3, matches.Length); Assert.AreEqual(SpanFactory.Create(15, 17), matches[0].Span); Assert.AreEqual(SpanFactory.Create(20, 22), matches[1].Span); Assert.AreEqual(SpanFactory.Create(25, 27), matches[2].Span); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(4, matches.Length); Assert.AreEqual(SpanFactory.Create(1, 3), matches[0].Span); Assert.AreEqual(SpanFactory.Create(15, 17), matches[1].Span); Assert.AreEqual(SpanFactory.Create(25, 27), matches[3].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(3, matches.Length); Assert.AreEqual(SpanFactory.Create(15, 17), matches[0].Span); Assert.AreEqual(SpanFactory.Create(20, 22), matches[1].Span); Assert.AreEqual(SpanFactory.Create(25, 27), matches[2].Span); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); }
public void RangePattern() { AnnotatedStringData sentence = CreateStringData("the old, angry man slept well."); Pattern<AnnotatedStringData, int> pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Value).Range(0, 2).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); Match<AnnotatedStringData, int> match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 3), match.Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(9, 12), match.Span); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); Match<AnnotatedStringData, int>[] matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(9, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.AreEqual(SpanFactory.Create(15, 18), matches[4].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[8].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(7, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 12), matches[0].Span); Assert.AreEqual(SpanFactory.Create(19, 22), matches[3].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[6].Span); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(51, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].Span); Assert.AreEqual(SpanFactory.Create(0, 1), matches[2].Span); Assert.AreEqual(SpanFactory.Create(1, 3), matches[3].Span); Assert.AreEqual(SpanFactory.Create(1, 2), matches[4].Span); Assert.AreEqual(SpanFactory.Create(2, 3), matches[5].Span); Assert.AreEqual(SpanFactory.Create(9, 10), matches[14].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[50].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(39, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 12), matches[0].Span); Assert.AreEqual(SpanFactory.Create(9, 11), matches[1].Span); Assert.AreEqual(SpanFactory.Create(9, 10), matches[2].Span); Assert.AreEqual(SpanFactory.Create(13, 14), matches[11].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[38].Span); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); sentence.Annotations.Add(0, 3, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("det").Value); sentence.Annotations.Add(4, 7, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(9, 14, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(15, 18, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value); sentence.Annotations.Add(19, 24, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("verb").Value); sentence.Annotations.Add(25, 29, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adv").Value); sentence.Annotations.Add(0, 18, FeatureStruct.New().Symbol(NP).Value); sentence.Annotations.Add(19, 29, FeatureStruct.New().Symbol(VP).Value); pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Value).Range(1, 3).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 3), match.Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(9, 13), match.Span); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(6, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.AreEqual(SpanFactory.Create(15, 18), matches[3].Span); Assert.AreEqual(SpanFactory.Create(25, 29), matches[5].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(4, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 13), matches[0].Span); Assert.AreEqual(SpanFactory.Create(19, 23), matches[2].Span); Assert.AreEqual(SpanFactory.Create(25, 29), matches[3].Span); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(33, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].Span); Assert.AreEqual(SpanFactory.Create(1, 3), matches[2].Span); Assert.AreEqual(SpanFactory.Create(4, 7), matches[3].Span); Assert.AreEqual(SpanFactory.Create(4, 6), matches[4].Span); Assert.AreEqual(SpanFactory.Create(12, 14), matches[14].Span); Assert.AreEqual(SpanFactory.Create(27, 29), matches[32].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(27, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 13), matches[0].Span); Assert.AreEqual(SpanFactory.Create(9, 12), matches[1].Span); Assert.AreEqual(SpanFactory.Create(9, 11), matches[2].Span); Assert.AreEqual(SpanFactory.Create(16, 18), matches[11].Span); Assert.AreEqual(SpanFactory.Create(27, 29), matches[26].Span); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Value).LazyRange(1, 3).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 2), match.Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(9, 11), match.Span); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(9, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 2), matches[0].Span); Assert.AreEqual(SpanFactory.Create(15, 17), matches[4].Span); Assert.AreEqual(SpanFactory.Create(27, 29), matches[8].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(7, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 11), matches[0].Span); Assert.AreEqual(SpanFactory.Create(19, 21), matches[3].Span); Assert.AreEqual(SpanFactory.Create(27, 29), matches[6].Span); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(33, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 2), matches[0].Span); Assert.AreEqual(SpanFactory.Create(0, 3), matches[1].Span); Assert.AreEqual(SpanFactory.Create(1, 3), matches[2].Span); Assert.AreEqual(SpanFactory.Create(4, 6), matches[3].Span); Assert.AreEqual(SpanFactory.Create(4, 7), matches[4].Span); Assert.AreEqual(SpanFactory.Create(12, 14), matches[14].Span); Assert.AreEqual(SpanFactory.Create(27, 29), matches[32].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(27, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 11), matches[0].Span); Assert.AreEqual(SpanFactory.Create(9, 12), matches[1].Span); Assert.AreEqual(SpanFactory.Create(9, 13), matches[2].Span); Assert.AreEqual(SpanFactory.Create(16, 18), matches[11].Span); Assert.AreEqual(SpanFactory.Create(27, 29), matches[26].Span); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); }
public void NondeterministicPattern() { var any = FeatureStruct.New().Value; var pattern = Pattern<AnnotatedStringData, int>.New() .Group("first", first => first.Annotation(any).OneOrMore) .Group("second", second => second.Annotation(any).OneOrMore).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); var word = new AnnotatedStringData(SpanFactory, "test"); word.Annotations.Add(0, 1, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); word.Annotations.Add(1, 2, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("e").Value); word.Annotations.Add(2, 3, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("s").Value); word.Annotations.Add(3, 4, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); Match<AnnotatedStringData, int>[] matches = matcher.AllMatches(word).ToArray(); Assert.AreEqual(3, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 4), matches[0].Span); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(3, 4), matches[0].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[1].Span); Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(2, 4), matches[1].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[2].Span); Assert.AreEqual(SpanFactory.Create(0, 1), matches[2].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(1, 4), matches[2].GroupCaptures["second"].Span); }
public void AlternationPattern() { AnnotatedStringData sentence = CreateStringData("the old, angry man slept well."); Pattern<AnnotatedStringData, int> pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo(" ").Value) .Or .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("son+").Symbol(Seg).Symbol("syl-").Value).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); Match<AnnotatedStringData, int> match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(1, 2), match.Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(8, 9), match.Span); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); Match<AnnotatedStringData, int>[] matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(16, matches.Length); Assert.AreEqual(SpanFactory.Create(1, 2), matches[0].Span); Assert.AreEqual(SpanFactory.Create(13, 14), matches[6].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[15].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(13, matches.Length); Assert.AreEqual(SpanFactory.Create(8, 9), matches[0].Span); Assert.AreEqual(SpanFactory.Create(15, 16), matches[5].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[12].Span); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(16, matches.Length); Assert.AreEqual(SpanFactory.Create(1, 2), matches[0].Span); Assert.AreEqual(SpanFactory.Create(13, 14), matches[6].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[15].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(13, matches.Length); Assert.AreEqual(SpanFactory.Create(8, 9), matches[0].Span); Assert.AreEqual(SpanFactory.Create(15, 16), matches[5].Span); Assert.AreEqual(SpanFactory.Create(28, 29), matches[12].Span); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); sentence.Annotations.Add(0, 3, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("det").Value); sentence.Annotations.Add(4, 7, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(9, 14, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(15, 18, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value); sentence.Annotations.Add(19, 24, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("verb").Value); sentence.Annotations.Add(25, 29, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adv").Value); sentence.Annotations.Add(0, 18, FeatureStruct.New().Symbol(NP).Value); sentence.Annotations.Add(19, 29, FeatureStruct.New().Symbol(VP).Value); pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("det").Value) .Or .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value) .Or .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo(" ").Value) .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("det").Value) .Or .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value) .Or .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 10)); match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 7), match.Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(9, 18), match.Span); match = matcher.Match(sentence, 10); Assert.IsFalse(match.Success); matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 7), matches[0].Span); Assert.AreEqual(SpanFactory.Create(9, 18), matches[1].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(1, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 18), matches[0].Span); matches = matcher.Matches(sentence, 10).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 7), matches[0].Span); Assert.AreEqual(SpanFactory.Create(9, 18), matches[1].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(1, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 18), matches[0].Span); matches = matcher.AllMatches(sentence, 10).ToArray(); Assert.AreEqual(0, matches.Length); }
public void CapturingGroupPattern() { AnnotatedStringData sentence = CreateStringData("the old, angry man slept well."); Pattern<AnnotatedStringData, int> pattern = Pattern<AnnotatedStringData, int>.New() .Group("onset", onset => onset .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl-").Value).ZeroOrMore) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value) .Group("coda", coda => coda .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl-").Value).ZeroOrMore).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 29)); Match<AnnotatedStringData, int> match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 3), match.Span); Assert.IsTrue(match.GroupCaptures["onset"].Success); Assert.AreEqual(SpanFactory.Create(0, 2), match.GroupCaptures["onset"].Span); Assert.IsFalse(match.GroupCaptures["coda"].Success); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(9, 14), match.Span); Assert.IsFalse(match.GroupCaptures["onset"].Success); Assert.IsTrue(match.GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(10, 14), match.GroupCaptures["coda"].Span); match = matcher.Match(sentence, 29); Assert.IsFalse(match.Success); Match<AnnotatedStringData, int>[] matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(6, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.AreEqual(SpanFactory.Create(4, 7), matches[1].Span); Assert.IsFalse(matches[1].GroupCaptures["onset"].Success); Assert.IsTrue(matches[1].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(5, 7), matches[1].GroupCaptures["coda"].Span); Assert.AreEqual(SpanFactory.Create(25, 29), matches[5].Span); Assert.IsTrue(matches[5].GroupCaptures["onset"].Success); Assert.AreEqual(SpanFactory.Create(25, 26), matches[5].GroupCaptures["onset"].Span); Assert.IsTrue(matches[5].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(27, 29), matches[5].GroupCaptures["coda"].Span); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(4, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 14), matches[0].Span); Assert.IsFalse(matches[0].GroupCaptures["onset"].Success); Assert.IsTrue(matches[0].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(10, 14), matches[0].GroupCaptures["coda"].Span); Assert.AreEqual(SpanFactory.Create(15, 18), matches[1].Span); Assert.IsTrue(matches[1].GroupCaptures["onset"].Success); Assert.AreEqual(SpanFactory.Create(15, 16), matches[1].GroupCaptures["onset"].Span); Assert.IsTrue(matches[1].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(17, 18), matches[1].GroupCaptures["coda"].Span); Assert.AreEqual(SpanFactory.Create(25, 29), matches[3].Span); matches = matcher.Matches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(30, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].Span); Assert.IsTrue(matches[0].GroupCaptures["onset"].Success); Assert.AreEqual(SpanFactory.Create(0, 2), matches[0].GroupCaptures["onset"].Span); Assert.IsFalse(matches[0].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(1, 3), matches[1].Span); Assert.IsTrue(matches[1].GroupCaptures["onset"].Success); Assert.AreEqual(SpanFactory.Create(1, 2), matches[1].GroupCaptures["onset"].Span); Assert.IsFalse(matches[1].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(2, 3), matches[2].Span); Assert.AreEqual(SpanFactory.Create(4, 7), matches[3].Span); Assert.AreEqual(SpanFactory.Create(4, 6), matches[4].Span); Assert.AreEqual(SpanFactory.Create(4, 5), matches[5].Span); Assert.AreEqual(SpanFactory.Create(16, 17), matches[14].Span); Assert.AreEqual(SpanFactory.Create(26, 27), matches[29].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(24, matches.Length); Assert.AreEqual(SpanFactory.Create(9, 14), matches[0].Span); Assert.IsFalse(matches[0].GroupCaptures["onset"].Success); Assert.IsTrue(matches[0].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(10, 14), matches[0].GroupCaptures["coda"].Span); Assert.AreEqual(SpanFactory.Create(9, 13), matches[1].Span); Assert.IsFalse(matches[1].GroupCaptures["onset"].Success); Assert.IsTrue(matches[1].GroupCaptures["coda"].Success); Assert.AreEqual(SpanFactory.Create(10, 13), matches[1].GroupCaptures["coda"].Span); Assert.AreEqual(SpanFactory.Create(9, 12), matches[2].Span); Assert.AreEqual(SpanFactory.Create(19, 22), matches[11].Span); Assert.AreEqual(SpanFactory.Create(26, 27), matches[23].Span); matches = matcher.AllMatches(sentence, 29).ToArray(); Assert.AreEqual(0, matches.Length); sentence.Annotations.Add(0, 3, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("det").Value); sentence.Annotations.Add(4, 7, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(9, 14, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value); sentence.Annotations.Add(15, 18, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value); sentence.Annotations.Add(19, 24, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("verb").Value); sentence.Annotations.Add(25, 29, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adv").Value); pattern = Pattern<AnnotatedStringData, int>.New() .Group("NP", np => np .Group(det => det .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("det").Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Value).OneOrMore).Optional .Group(adj => adj .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adj").Value) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Value).OneOrMore).ZeroOrMore .Group("headNoun", headNoun => headNoun.Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value))) .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Value).OneOrMore .Group("VP", vp => vp .Group("headVerb", headVerb => headVerb.Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("verb").Value)) .Group(adv => adv .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Value).OneOrMore .Annotation(FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("adv").Value)).ZeroOrMore).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern); Assert.IsTrue(matcher.IsMatch(sentence)); Assert.IsTrue(matcher.IsMatch(sentence, 7)); Assert.IsFalse(matcher.IsMatch(sentence, 16)); match = matcher.Match(sentence); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(0, 29), match.Span); Assert.IsTrue(match.GroupCaptures["NP"].Success); Assert.AreEqual(SpanFactory.Create(0, 18), match.GroupCaptures["NP"].Span); Assert.IsTrue(match.GroupCaptures["headNoun"].Success); Assert.AreEqual(SpanFactory.Create(15, 18), match.GroupCaptures["headNoun"].Span); Assert.IsTrue(match.GroupCaptures["VP"].Success); Assert.AreEqual(SpanFactory.Create(19, 29), match.GroupCaptures["VP"].Span); Assert.IsTrue(match.GroupCaptures["headVerb"].Success); Assert.AreEqual(SpanFactory.Create(19, 24), match.GroupCaptures["headVerb"].Span); match = matcher.Match(sentence, 7); Assert.IsTrue(match.Success); Assert.AreEqual(SpanFactory.Create(9, 29), match.Span); Assert.IsTrue(match.GroupCaptures["NP"].Success); Assert.AreEqual(SpanFactory.Create(9, 18), match.GroupCaptures["NP"].Span); Assert.IsTrue(match.GroupCaptures["headNoun"].Success); Assert.AreEqual(SpanFactory.Create(15, 18), match.GroupCaptures["headNoun"].Span); Assert.IsTrue(match.GroupCaptures["VP"].Success); Assert.AreEqual(SpanFactory.Create(19, 29), match.GroupCaptures["VP"].Span); Assert.IsTrue(match.GroupCaptures["headVerb"].Success); Assert.AreEqual(SpanFactory.Create(19, 24), match.GroupCaptures["headVerb"].Span); match = matcher.Match(sentence, 16); Assert.IsFalse(match.Success); matches = matcher.Matches(sentence).ToArray(); Assert.AreEqual(1, matches.Length); matches = matcher.Matches(sentence, 7).ToArray(); Assert.AreEqual(1, matches.Length); matches = matcher.Matches(sentence, 16).ToArray(); Assert.AreEqual(0, matches.Length); matches = matcher.AllMatches(sentence).ToArray(); Assert.AreEqual(8, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 24), matches[1].Span); Assert.IsTrue(matches[1].GroupCaptures["NP"].Success); Assert.AreEqual(SpanFactory.Create(0, 18), matches[1].GroupCaptures["NP"].Span); Assert.IsTrue(matches[1].GroupCaptures["headNoun"].Success); Assert.AreEqual(SpanFactory.Create(15, 18), matches[1].GroupCaptures["headNoun"].Span); Assert.IsTrue(matches[1].GroupCaptures["VP"].Success); Assert.AreEqual(SpanFactory.Create(19, 24), matches[1].GroupCaptures["VP"].Span); Assert.IsTrue(matches[1].GroupCaptures["headVerb"].Success); Assert.AreEqual(SpanFactory.Create(19, 24), matches[1].GroupCaptures["headVerb"].Span); Assert.AreEqual(SpanFactory.Create(15, 24), matches[7].Span); Assert.IsTrue(matches[7].GroupCaptures["NP"].Success); Assert.AreEqual(SpanFactory.Create(15, 18), matches[7].GroupCaptures["NP"].Span); Assert.IsTrue(matches[7].GroupCaptures["headNoun"].Success); Assert.AreEqual(SpanFactory.Create(15, 18), matches[7].GroupCaptures["headNoun"].Span); Assert.IsTrue(matches[7].GroupCaptures["VP"].Success); Assert.AreEqual(SpanFactory.Create(19, 24), matches[7].GroupCaptures["VP"].Span); Assert.IsTrue(matches[7].GroupCaptures["headVerb"].Success); Assert.AreEqual(SpanFactory.Create(19, 24), matches[7].GroupCaptures["headVerb"].Span); matches = matcher.AllMatches(sentence, 7).ToArray(); Assert.AreEqual(4, matches.Length); matches = matcher.AllMatches(sentence, 16).ToArray(); Assert.AreEqual(0, matches.Length); }
public void NondeterministicPattern() { var any = FeatureStruct.New().Value; var pattern = Pattern<AnnotatedStringData, int>.New() .Group("first", first => first.Annotation(any).OneOrMore) .Group("second", second => second.Annotation(any).OneOrMore).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); var word = new AnnotatedStringData(SpanFactory, "test"); word.Annotations.Add(0, 1, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); word.Annotations.Add(1, 2, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("e").Value); word.Annotations.Add(2, 3, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("s").Value); word.Annotations.Add(3, 4, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); Match<AnnotatedStringData, int>[] matches = matcher.AllMatches(word).ToArray(); Assert.AreEqual(3, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 4), matches[0].Span); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(3, 4), matches[0].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[1].Span); Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(2, 4), matches[1].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[2].Span); Assert.AreEqual(SpanFactory.Create(0, 1), matches[2].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(1, 4), matches[2].GroupCaptures["second"].Span); pattern = Pattern<AnnotatedStringData, int>.New() .Group("first", g1 => g1.Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value)) .Group("second", g2 => g2.Group("third", g3 => g3.Annotation(FeatureStruct.New().Symbol(Seg).Value).Optional).ZeroOrMore).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); word = CreateStringData("etested"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 7))); Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["second"].Span, Is.EqualTo(SpanFactory.Create(1, 7))); Assert.That(matches[0].GroupCaptures["third"].Span, Is.EqualTo(SpanFactory.Create(6, 7))); word = CreateStringData("e"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["second"].Success, Is.False); Assert.That(matches[0].GroupCaptures["third"].Success, Is.False); pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value) .Group("first", g => g.Annotation(FeatureStruct.New().Symbol(Seg).Value).ZeroOrMore).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); word = CreateStringData("etested"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 7))); Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(1, 7))); word = CreateStringData("e"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["first"].Success, Is.False); }