示例#1
0
        public void Batch()
        {
            var pattern = Pattern <StringData, int> .New()
                          .Group("leftEnv", leftEnv => leftEnv
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons+")
                                             .Feature("voice").EqualToVariable("a").Value))
                          .Group("target", target => target
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons-")
                                             .Symbol("low+").Value))
                          .Group("rightEnv", rightEnv => rightEnv
                                 .Annotation(FeatureStruct.New(PhoneticFeatSys)
                                             .Symbol(Seg)
                                             .Symbol("cons+")
                                             .Feature("voice").Not.EqualToVariable("a").Value)).Value;

            var ruleSpec1 = new DefaultPatternRuleSpec <StringData, int>(pattern, (PatternRule <StringData, int> r, Match <StringData, int> match, out StringData output) =>
            {
                GroupCapture <int> target = match.GroupCaptures["target"];
                foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span))
                {
                    ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys)
                                                    .Symbol("low-")
                                                    .Symbol("mid-").Value);
                }
                output = match.Input;
                return(target.Span.End);
            },
                                                                         input => input.Annotations.Single(ann => ((FeatureSymbol)ann.FeatureStruct.GetValue(Type)) == Word)
                                                                         .FeatureStruct.IsUnifiable(FeatureStruct.New(WordFeatSys).Symbol("verb").Value));

            var ruleSpec2 = new DefaultPatternRuleSpec <StringData, int>(pattern, (PatternRule <StringData, int> r, Match <StringData, int> match, out StringData output) =>
            {
                GroupCapture <int> target = match.GroupCaptures["target"];
                foreach (Annotation <int> ann in match.Input.Annotations.GetNodes(target.Span))
                {
                    ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys)
                                                    .Symbol("low-")
                                                    .Symbol("mid+").Value);
                }
                output = match.Input;
                return(target.Span.End);
            });

            var        batchSpec = new BatchPatternRuleSpec <StringData, int>(new[] { ruleSpec1, ruleSpec2 });
            var        rule      = new PatternRule <StringData, int>(SpanFactory, batchSpec);
            StringData inputWord = CreateStringData("fazk");

            inputWord.Annotations.Add(inputWord.Span, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value);
            Assert.IsTrue(rule.Apply(inputWord).Any());
        }
示例#2
0
文件: Stemmer.cs 项目: JRetza/cog
        private void StemWords(Direction dir, IEnumerable <Word> words, IEnumerable <Affix> affixes)
        {
            var ruleSpec = new BatchPatternRuleSpec <Word, ShapeNode>();

            foreach (Affix affix in affixes)
            {
                var pattern = new Pattern <Word, ShapeNode> {
                    Acceptable = CheckStemWholeWord
                };
                if (dir == Direction.LeftToRight)
                {
                    pattern.Children.Add(new Constraint <Word, ShapeNode>(FeatureStruct.New().Symbol(CogFeatureSystem.AnchorType).Value));
                }
                foreach (ShapeNode node in affix.Shape)
                {
                    pattern.Children.Add(new Quantifier <Word, ShapeNode>(0, 1, new Constraint <Word, ShapeNode>(FeatureStruct.New().Symbol(CogFeatureSystem.BoundaryType).Value)));
                    pattern.Children.Add(new Constraint <Word, ShapeNode>(node.Annotation.FeatureStruct.Clone()));
                    pattern.Children.Add(new Quantifier <Word, ShapeNode>(0, 1, new Constraint <Word, ShapeNode>(FeatureStruct.New().Symbol(CogFeatureSystem.ToneLetterType).Value)));
                }
                if (dir == Direction.RightToLeft)
                {
                    pattern.Children.Add(new Constraint <Word, ShapeNode>(FeatureStruct.New().Symbol(CogFeatureSystem.AnchorType).Value));
                }
                string category = affix.Category;
                ruleSpec.RuleSpecs.Add(new DefaultPatternRuleSpec <Word, ShapeNode>(pattern, MarkStem, word => category == null || word.Meaning.Category == category));
            }

            var matcherSettings = new MatcherSettings <ShapeNode>
            {
                Direction = dir,
                Filter    = ann => ann.Type().IsOneOf(CogFeatureSystem.ConsonantType, CogFeatureSystem.VowelType, CogFeatureSystem.AnchorType,
                                                      CogFeatureSystem.ToneLetterType, CogFeatureSystem.BoundaryType)
            };
            var rule = new PatternRule <Word, ShapeNode>(ruleSpec, matcherSettings);

            foreach (Word word in words.Where(w => w.Shape.Count > 0))
            {
                rule.Apply(word);
            }
        }
示例#3
0
        public void Batch()
        {
            var pattern = Pattern<AnnotatedStringData, int>.New()
                .Group("leftEnv", leftEnv => leftEnv
                    .Annotation(FeatureStruct.New(PhoneticFeatSys)
                        .Symbol(Seg)
                        .Symbol("cons+")
                        .Feature("voice").EqualToVariable("a").Value))
                .Group("target", target => target
                    .Annotation(FeatureStruct.New(PhoneticFeatSys)
                        .Symbol(Seg)
                        .Symbol("cons-")
                        .Symbol("low+").Value))
                .Group("rightEnv", rightEnv => rightEnv
                    .Annotation(FeatureStruct.New(PhoneticFeatSys)
                        .Symbol(Seg)
                        .Symbol("cons+")
                        .Feature("voice").Not.EqualToVariable("a").Value)).Value;

            var ruleSpec1 = new DefaultPatternRuleSpec<AnnotatedStringData, int>(pattern, (PatternRule<AnnotatedStringData, int> r, Match<AnnotatedStringData, int> match, out AnnotatedStringData output) =>
                                                {
                                                    GroupCapture<int> target = match.GroupCaptures["target"];
                                                    foreach (Annotation<int> ann in match.Input.Annotations.GetNodes(target.Span))
                                                        ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys)
                                                            .Symbol("low-")
                                                            .Symbol("mid-").Value);
                                                    output = match.Input;
                                                    return target.Span.End;
                                                },
                                                input => input.Annotations.Single(ann => ((FeatureSymbol) ann.FeatureStruct.GetValue(Type)) == Word)
                                                    .FeatureStruct.IsUnifiable(FeatureStruct.New(WordFeatSys).Symbol("verb").Value));

            var ruleSpec2 = new DefaultPatternRuleSpec<AnnotatedStringData, int>(pattern, (PatternRule<AnnotatedStringData, int> r, Match<AnnotatedStringData, int> match, out AnnotatedStringData output) =>
                                                {
                                                    GroupCapture<int> target = match.GroupCaptures["target"];
                                                    foreach (Annotation<int> ann in match.Input.Annotations.GetNodes(target.Span))
                                                        ann.FeatureStruct.PriorityUnion(FeatureStruct.New(PhoneticFeatSys)
                                                            .Symbol("low-")
                                                            .Symbol("mid+").Value);
                                                    output = match.Input;
                                                    return target.Span.End;
                                                });

            var batchSpec = new BatchPatternRuleSpec<AnnotatedStringData, int>(new[] {ruleSpec1, ruleSpec2});
            var rule = new PatternRule<AnnotatedStringData, int>(SpanFactory, batchSpec);
            AnnotatedStringData inputWord = CreateStringData("fazk");
            inputWord.Annotations.Add(inputWord.Span, FeatureStruct.New(WordFeatSys).Symbol(Word).Symbol("noun").Value);
            Assert.IsTrue(rule.Apply(inputWord).Any());
        }