public void Apply() { var pattern = Pattern <AnnotatedStringData, int> .New() .Group("leftEnv", leftEnv => leftEnv .Annotation(FeatureStruct.New(PhoneticFeatSys) .Symbol(Seg) .Symbol("cons+") .Feature("voice").EqualToVariable("a").Value)) .Group("target", target => target .Annotation(FeatureStruct.New(PhoneticFeatSys) .Symbol(Seg) .Symbol("cons-") .Symbol("low+").Value)) .Group("rightEnv", rightEnv => rightEnv .Annotation(FeatureStruct.New(PhoneticFeatSys) .Symbol(Seg) .Symbol("cons+") .Feature("voice").Not.EqualToVariable("a").Value)).Value; var ruleSpec = new DefaultPatternRuleSpec <AnnotatedStringData, int>(pattern, (r, match) => { GroupCapture <int> target = match.GroupCaptures["target"]; foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span)) { ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys).Symbol("low-").Value); } return(match.Input); }); var rule = new PatternRule <AnnotatedStringData, int>(SpanFactory, ruleSpec); AnnotatedStringData inputWord = CreateStringData("fazk"); Assert.IsTrue(rule.Apply(inputWord).Any()); }
protected AnnotatedStringData CreateStringData(string str) { var stringData = new AnnotatedStringData(SpanFactory, str); for (int i = 0; i < str.Length; i++) { FeatureStruct fs = Characters[str[i]]; stringData.Annotations.Add(i, i + 1, fs.DeepClone()); } return(stringData); }
public void Batch() { var pattern = Pattern <AnnotatedStringData, int> .New() .Group("leftEnv", leftEnv => leftEnv .Annotation(FeatureStruct.New(PhoneticFeatSys) .Symbol(Seg) .Symbol("cons+") .Feature("voice").EqualToVariable("a").Value)) .Group("target", target => target .Annotation(FeatureStruct.New(PhoneticFeatSys) .Symbol(Seg) .Symbol("cons-") .Symbol("low+").Value)) .Group("rightEnv", rightEnv => rightEnv .Annotation(FeatureStruct.New(PhoneticFeatSys) .Symbol(Seg) .Symbol("cons+") .Feature("voice").Not.EqualToVariable("a").Value)).Value; var ruleSpec1 = new DefaultPatternRuleSpec <AnnotatedStringData, int>(pattern, (r, match) => { GroupCapture <int> target = match.GroupCaptures["target"]; foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span)) { ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys) .Symbol("low-") .Symbol("mid-").Value); } return(match.Input); }, input => input.Annotations.Single(ann => ((FeatureSymbol)ann.FeatureStruct.GetValue(Type)) == Word) .FeatureStruct.IsUnifiable(FeatureStruct.New(WordFeatSys).Symbol("verb").Value)); var ruleSpec2 = new DefaultPatternRuleSpec <AnnotatedStringData, int>(pattern, (r, match) => { GroupCapture <int> target = match.GroupCaptures["target"]; foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span)) { ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys) .Symbol("low-") .Symbol("mid+").Value); } return(match.Input); }); var batchSpec = new BatchPatternRuleSpec <AnnotatedStringData, int>(new[] { ruleSpec1, ruleSpec2 }); var rule = new PatternRule <AnnotatedStringData, int>(SpanFactory, batchSpec); AnnotatedStringData inputWord = CreateStringData("fazk"); inputWord.Annotations.Add(inputWord.Span, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value); Assert.IsTrue(rule.Apply(inputWord).Any()); }
public void NondeterministicPattern() { var any = FeatureStruct.New().Value; var pattern = Pattern<AnnotatedStringData, int>.New() .Group("first", first => first.Annotation(any).OneOrMore) .Group("second", second => second.Annotation(any).OneOrMore).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); var word = new AnnotatedStringData(SpanFactory, "test"); word.Annotations.Add(0, 1, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); word.Annotations.Add(1, 2, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("e").Value); word.Annotations.Add(2, 3, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("s").Value); word.Annotations.Add(3, 4, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); Match<AnnotatedStringData, int>[] matches = matcher.AllMatches(word).ToArray(); Assert.AreEqual(3, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 4), matches[0].Span); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(3, 4), matches[0].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[1].Span); Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(2, 4), matches[1].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[2].Span); Assert.AreEqual(SpanFactory.Create(0, 1), matches[2].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(1, 4), matches[2].GroupCaptures["second"].Span); }
protected AnnotatedStringData CreateStringData(string str) { var stringData = new AnnotatedStringData(SpanFactory, str); for (int i = 0; i < str.Length; i++) { FeatureStruct fs = Characters[str[i]]; stringData.Annotations.Add(i, i + 1, fs.DeepClone()); } return stringData; }
public void NondeterministicPattern() { var any = FeatureStruct.New().Value; var pattern = Pattern<AnnotatedStringData, int>.New() .Group("first", first => first.Annotation(any).OneOrMore) .Group("second", second => second.Annotation(any).OneOrMore).Value; var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); var word = new AnnotatedStringData(SpanFactory, "test"); word.Annotations.Add(0, 1, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); word.Annotations.Add(1, 2, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("e").Value); word.Annotations.Add(2, 3, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("s").Value); word.Annotations.Add(3, 4, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value); Match<AnnotatedStringData, int>[] matches = matcher.AllMatches(word).ToArray(); Assert.AreEqual(3, matches.Length); Assert.AreEqual(SpanFactory.Create(0, 4), matches[0].Span); Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(3, 4), matches[0].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[1].Span); Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(2, 4), matches[1].GroupCaptures["second"].Span); Assert.AreEqual(SpanFactory.Create(0, 4), matches[2].Span); Assert.AreEqual(SpanFactory.Create(0, 1), matches[2].GroupCaptures["first"].Span); Assert.AreEqual(SpanFactory.Create(1, 4), matches[2].GroupCaptures["second"].Span); pattern = Pattern<AnnotatedStringData, int>.New() .Group("first", g1 => g1.Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value)) .Group("second", g2 => g2.Group("third", g3 => g3.Annotation(FeatureStruct.New().Symbol(Seg).Value).Optional).ZeroOrMore).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); word = CreateStringData("etested"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 7))); Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["second"].Span, Is.EqualTo(SpanFactory.Create(1, 7))); Assert.That(matches[0].GroupCaptures["third"].Span, Is.EqualTo(SpanFactory.Create(6, 7))); word = CreateStringData("e"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["second"].Success, Is.False); Assert.That(matches[0].GroupCaptures["third"].Success, Is.False); pattern = Pattern<AnnotatedStringData, int>.New() .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value) .Group("first", g => g.Annotation(FeatureStruct.New().Symbol(Seg).Value).ZeroOrMore).Value; matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern, new MatcherSettings<int> { AnchoredToStart = true, AnchoredToEnd = true, AllSubmatches = true }); word = CreateStringData("etested"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 7))); Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(1, 7))); word = CreateStringData("e"); matches = matcher.AllMatches(word).ToArray(); Assert.That(matches.Length, Is.EqualTo(1)); Assert.That(matches[0].Success, Is.True); Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 1))); Assert.That(matches[0].GroupCaptures["first"].Success, Is.False); }
public void Transduce() { var fst = new Fst <AnnotatedStringData, int>(_operations) { UseUnification = false }; fst.StartState = fst.CreateAcceptingState(); fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas-", "nas?").Value, fst.StartState); fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Symbol("cor+", "cor-").Value, fst.StartState); State <AnnotatedStringData, int> s1 = fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, fst.CreateState()); s1.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, fst.StartState); State <AnnotatedStringData, int> s2 = fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor+").Value, fst.CreateAcceptingState()); s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor+").Value, s2); s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas-", "nas?").Symbol("cor+", "cor?").Value, fst.StartState); s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Symbol("cor+").Value, fst.StartState); s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, s1); Fst <AnnotatedStringData, int> dfst = fst.Determinize(); AnnotatedStringData data = CreateStringData("caNp"); FstResult <AnnotatedStringData, int> result; Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True); Assert.That(result.Output.String, Is.EqualTo("camp")); data = CreateStringData("caN"); Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True); Assert.That(result.Output.String, Is.EqualTo("can")); data = CreateStringData("carp"); Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True); Assert.That(result.Output.String, Is.EqualTo("carp")); fst = new Fst <AnnotatedStringData, int>(_operations) { UseUnification = false }; fst.StartState = fst.CreateAcceptingState(); s1 = fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cons+").Value, fst.CreateState()) .Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cons-").Value, fst.CreateState()); s2 = s1.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Value, null, fst.CreateState()); State <AnnotatedStringData, int> s3 = s1.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("voice-").Value, fst.CreateState()); s3.Arcs.Add(null, FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo(".").Value, s2); s3.Arcs.Add(null, FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo("+").Value, fst.CreateState()) .Arcs.Add(null, FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo(".").Value, s2); s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cons+").Value, fst.CreateAcceptingState()); dfst = fst.Determinize(); data = CreateStringData("camp"); Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True); Assert.That(result.Output.String, Is.EqualTo("cap")); data = CreateStringData("casp"); IEnumerable <FstResult <AnnotatedStringData, int> > results; Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out results), Is.True); FstResult <AnnotatedStringData, int>[] resultsArray = results.ToArray(); Assert.That(resultsArray.Length, Is.EqualTo(2)); Assert.That(resultsArray.Select(r => r.Output.String), Is.EquivalentTo(new [] { "cas+.p", "cas.p" })); }