Exemplo n.º 1
0
        public void Apply()
        {
            var pattern = Pattern <AnnotatedStringData, int> .New()
                          .Group("leftEnv", leftEnv => leftEnv
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons+")
                                             .Feature("voice").EqualToVariable("a").Value))
                          .Group("target", target => target
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons-")
                                             .Symbol("low+").Value))
                          .Group("rightEnv", rightEnv => rightEnv
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons+")
                                             .Feature("voice").Not.EqualToVariable("a").Value)).Value;

            var ruleSpec = new DefaultPatternRuleSpec <AnnotatedStringData, int>(pattern, (r, match) =>
            {
                GroupCapture <int> target = match.GroupCaptures["target"];
                foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span))
                {
                    ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys).Symbol("low-").Value);
                }
                return(match.Input);
            });

            var rule = new PatternRule <AnnotatedStringData, int>(SpanFactory, ruleSpec);
            AnnotatedStringData inputWord = CreateStringData("fazk");

            Assert.IsTrue(rule.Apply(inputWord).Any());
        }
Exemplo n.º 2
0
        protected AnnotatedStringData CreateStringData(string str)
        {
            var stringData = new AnnotatedStringData(SpanFactory, str);

            for (int i = 0; i < str.Length; i++)
            {
                FeatureStruct fs = Characters[str[i]];
                stringData.Annotations.Add(i, i + 1, fs.DeepClone());
            }
            return(stringData);
        }
Exemplo n.º 3
0
        public void Batch()
        {
            var pattern = Pattern <AnnotatedStringData, int> .New()
                          .Group("leftEnv", leftEnv => leftEnv
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons+")
                                             .Feature("voice").EqualToVariable("a").Value))
                          .Group("target", target => target
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons-")
                                             .Symbol("low+").Value))
                          .Group("rightEnv", rightEnv => rightEnv
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons+")
                                             .Feature("voice").Not.EqualToVariable("a").Value)).Value;

            var ruleSpec1 = new DefaultPatternRuleSpec <AnnotatedStringData, int>(pattern, (r, match) =>
            {
                GroupCapture <int> target = match.GroupCaptures["target"];
                foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span))
                {
                    ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys)
                                                    .Symbol("low-")
                                                    .Symbol("mid-").Value);
                }
                return(match.Input);
            },
                                                                                  input => input.Annotations.Single(ann => ((FeatureSymbol)ann.FeatureStruct.GetValue(Type)) == Word)
                                                                                  .FeatureStruct.IsUnifiable(FeatureStruct.New(WordFeatSys).Symbol("verb").Value));

            var ruleSpec2 = new DefaultPatternRuleSpec <AnnotatedStringData, int>(pattern, (r, match) =>
            {
                GroupCapture <int> target = match.GroupCaptures["target"];
                foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span))
                {
                    ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys)
                                                    .Symbol("low-")
                                                    .Symbol("mid+").Value);
                }
                return(match.Input);
            });

            var batchSpec = new BatchPatternRuleSpec <AnnotatedStringData, int>(new[] { ruleSpec1, ruleSpec2 });
            var rule      = new PatternRule <AnnotatedStringData, int>(SpanFactory, batchSpec);
            AnnotatedStringData inputWord = CreateStringData("fazk");

            inputWord.Annotations.Add(inputWord.Span, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value);
            Assert.IsTrue(rule.Apply(inputWord).Any());
        }
Exemplo n.º 4
0
        public void NondeterministicPattern()
        {
            var any = FeatureStruct.New().Value;

            var pattern = Pattern<AnnotatedStringData, int>.New()
                .Group("first", first => first.Annotation(any).OneOrMore)
                .Group("second", second => second.Annotation(any).OneOrMore).Value;

            var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern,
                new MatcherSettings<int>
                {
                    AnchoredToStart = true,
                    AnchoredToEnd = true,
                    AllSubmatches = true
                });
            var word = new AnnotatedStringData(SpanFactory, "test");
            word.Annotations.Add(0, 1, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value);
            word.Annotations.Add(1, 2, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("e").Value);
            word.Annotations.Add(2, 3, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("s").Value);
            word.Annotations.Add(3, 4, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value);

            Match<AnnotatedStringData, int>[] matches = matcher.AllMatches(word).ToArray();
            Assert.AreEqual(3, matches.Length);
            Assert.AreEqual(SpanFactory.Create(0, 4), matches[0].Span);
            Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].GroupCaptures["first"].Span);
            Assert.AreEqual(SpanFactory.Create(3, 4), matches[0].GroupCaptures["second"].Span);

            Assert.AreEqual(SpanFactory.Create(0, 4), matches[1].Span);
            Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].GroupCaptures["first"].Span);
            Assert.AreEqual(SpanFactory.Create(2, 4), matches[1].GroupCaptures["second"].Span);

            Assert.AreEqual(SpanFactory.Create(0, 4), matches[2].Span);
            Assert.AreEqual(SpanFactory.Create(0, 1), matches[2].GroupCaptures["first"].Span);
            Assert.AreEqual(SpanFactory.Create(1, 4), matches[2].GroupCaptures["second"].Span);
        }
Exemplo n.º 5
0
 protected AnnotatedStringData CreateStringData(string str)
 {
     var stringData = new AnnotatedStringData(SpanFactory, str);
     for (int i = 0; i < str.Length; i++)
     {
         FeatureStruct fs = Characters[str[i]];
         stringData.Annotations.Add(i, i + 1, fs.DeepClone());
     }
     return stringData;
 }
Exemplo n.º 6
0
        public void NondeterministicPattern()
        {
            var any = FeatureStruct.New().Value;

            var pattern = Pattern<AnnotatedStringData, int>.New()
                .Group("first", first => first.Annotation(any).OneOrMore)
                .Group("second", second => second.Annotation(any).OneOrMore).Value;

            var matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern,
                new MatcherSettings<int>
                {
                    AnchoredToStart = true,
                    AnchoredToEnd = true,
                    AllSubmatches = true
                });
            var word = new AnnotatedStringData(SpanFactory, "test");
            word.Annotations.Add(0, 1, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value);
            word.Annotations.Add(1, 2, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("e").Value);
            word.Annotations.Add(2, 3, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("s").Value);
            word.Annotations.Add(3, 4, FeatureStruct.New(PhoneticFeatSys).Feature("strRep").EqualTo("t").Value);

            Match<AnnotatedStringData, int>[] matches = matcher.AllMatches(word).ToArray();
            Assert.AreEqual(3, matches.Length);
            Assert.AreEqual(SpanFactory.Create(0, 4), matches[0].Span);
            Assert.AreEqual(SpanFactory.Create(0, 3), matches[0].GroupCaptures["first"].Span);
            Assert.AreEqual(SpanFactory.Create(3, 4), matches[0].GroupCaptures["second"].Span);

            Assert.AreEqual(SpanFactory.Create(0, 4), matches[1].Span);
            Assert.AreEqual(SpanFactory.Create(0, 2), matches[1].GroupCaptures["first"].Span);
            Assert.AreEqual(SpanFactory.Create(2, 4), matches[1].GroupCaptures["second"].Span);

            Assert.AreEqual(SpanFactory.Create(0, 4), matches[2].Span);
            Assert.AreEqual(SpanFactory.Create(0, 1), matches[2].GroupCaptures["first"].Span);
            Assert.AreEqual(SpanFactory.Create(1, 4), matches[2].GroupCaptures["second"].Span);

            pattern = Pattern<AnnotatedStringData, int>.New()
                .Group("first", g1 => g1.Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value))
                .Group("second", g2 => g2.Group("third", g3 => g3.Annotation(FeatureStruct.New().Symbol(Seg).Value).Optional).ZeroOrMore).Value;

            matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern,
                new MatcherSettings<int>
                {
                    AnchoredToStart = true,
                    AnchoredToEnd = true,
                    AllSubmatches = true
                });

            word = CreateStringData("etested");
            matches = matcher.AllMatches(word).ToArray();
            Assert.That(matches.Length, Is.EqualTo(1));
            Assert.That(matches[0].Success, Is.True);
            Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 7)));
            Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(0, 1)));
            Assert.That(matches[0].GroupCaptures["second"].Span, Is.EqualTo(SpanFactory.Create(1, 7)));
            Assert.That(matches[0].GroupCaptures["third"].Span, Is.EqualTo(SpanFactory.Create(6, 7)));

            word = CreateStringData("e");
            matches = matcher.AllMatches(word).ToArray();
            Assert.That(matches.Length, Is.EqualTo(1));
            Assert.That(matches[0].Success, Is.True);
            Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 1)));
            Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(0, 1)));
            Assert.That(matches[0].GroupCaptures["second"].Success, Is.False);
            Assert.That(matches[0].GroupCaptures["third"].Success, Is.False);

            pattern = Pattern<AnnotatedStringData, int>.New()
                .Annotation(FeatureStruct.New(PhoneticFeatSys).Symbol(Seg).Symbol("syl+").Value)
                .Group("first", g => g.Annotation(FeatureStruct.New().Symbol(Seg).Value).ZeroOrMore).Value;

            matcher = new Matcher<AnnotatedStringData, int>(SpanFactory, pattern,
                new MatcherSettings<int>
                {
                    AnchoredToStart = true,
                    AnchoredToEnd = true,
                    AllSubmatches = true
                });

            word = CreateStringData("etested");
            matches = matcher.AllMatches(word).ToArray();
            Assert.That(matches.Length, Is.EqualTo(1));
            Assert.That(matches[0].Success, Is.True);
            Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 7)));
            Assert.That(matches[0].GroupCaptures["first"].Span, Is.EqualTo(SpanFactory.Create(1, 7)));

            word = CreateStringData("e");
            matches = matcher.AllMatches(word).ToArray();
            Assert.That(matches.Length, Is.EqualTo(1));
            Assert.That(matches[0].Success, Is.True);
            Assert.That(matches[0].Span, Is.EqualTo(SpanFactory.Create(0, 1)));
            Assert.That(matches[0].GroupCaptures["first"].Success, Is.False);
        }
Exemplo n.º 7
0
        public void Transduce()
        {
            var fst = new Fst <AnnotatedStringData, int>(_operations)
            {
                UseUnification = false
            };

            fst.StartState = fst.CreateAcceptingState();
            fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas-", "nas?").Value, fst.StartState);
            fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Symbol("cor+", "cor-").Value, fst.StartState);
            State <AnnotatedStringData, int> s1 = fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, fst.CreateState());

            s1.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, fst.StartState);
            State <AnnotatedStringData, int> s2 = fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor+").Value, fst.CreateAcceptingState());

            s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor+").Value, s2);
            s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas-", "nas?").Symbol("cor+", "cor?").Value, fst.StartState);
            s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Symbol("cor+").Value, fst.StartState);
            s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, s1);

            Fst <AnnotatedStringData, int> dfst = fst.Determinize();

            AnnotatedStringData data = CreateStringData("caNp");
            FstResult <AnnotatedStringData, int> result;

            Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True);
            Assert.That(result.Output.String, Is.EqualTo("camp"));

            data = CreateStringData("caN");
            Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True);
            Assert.That(result.Output.String, Is.EqualTo("can"));

            data = CreateStringData("carp");
            Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True);
            Assert.That(result.Output.String, Is.EqualTo("carp"));

            fst = new Fst <AnnotatedStringData, int>(_operations)
            {
                UseUnification = false
            };
            fst.StartState = fst.CreateAcceptingState();
            s1             = fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cons+").Value, fst.CreateState())
                             .Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cons-").Value, fst.CreateState());
            s2 = s1.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Value, null, fst.CreateState());
            State <AnnotatedStringData, int> s3 = s1.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("voice-").Value, fst.CreateState());

            s3.Arcs.Add(null, FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo(".").Value, s2);
            s3.Arcs.Add(null, FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo("+").Value, fst.CreateState())
            .Arcs.Add(null, FeatureStruct.New(PhoneticFeatSys).Symbol(Bdry).Feature("strRep").EqualTo(".").Value, s2);
            s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cons+").Value, fst.CreateAcceptingState());

            dfst = fst.Determinize();

            data = CreateStringData("camp");
            Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out result), Is.True);
            Assert.That(result.Output.String, Is.EqualTo("cap"));

            data = CreateStringData("casp");
            IEnumerable <FstResult <AnnotatedStringData, int> > results;

            Assert.That(dfst.Transduce(data, data.Annotations.First, null, true, true, true, out results), Is.True);
            FstResult <AnnotatedStringData, int>[] resultsArray = results.ToArray();
            Assert.That(resultsArray.Length, Is.EqualTo(2));
            Assert.That(resultsArray.Select(r => r.Output.String), Is.EquivalentTo(new [] { "cas+.p", "cas.p" }));
        }