Ejemplo n.º 1
0
        public Span <int> Insert(AnnotatedStringData data, Annotation <int> ann, FeatureStruct fs)
        {
            KeyValuePair <char, FeatureStruct> character = _characters.Single(kvp => kvp.Value.ValueEquals(fs));

            data.Insert(ann.Span.End, character.Key.ToString(CultureInfo.InvariantCulture));
            return(_spanFactory.Create(ann.Span.End, ann.Span.End + 1));
        }
Ejemplo n.º 2
0
        public void SegmentWord()
        {
            var meaning = new Meaning("gloss", "category");
            var word    = new Word("called", 0, 4, meaning);

            _segmenter.Segment(word);
            Assert.That(word.Shape.Count, Is.EqualTo(6));
            AssertShapeNodeEqual(word.Shape.First, "c", CogFeatureSystem.ConsonantType);
            AssertShapeNodeEqual(word.Shape.ElementAt(1), "a", CogFeatureSystem.VowelType);
            AssertShapeNodeEqual(word.Shape.ElementAt(4), "e", CogFeatureSystem.VowelType);
            AssertShapeNodeEqual(word.Shape.Last, "d", CogFeatureSystem.ConsonantType);
            Annotation <ShapeNode> stemAnn = word.Shape.Annotations.Single(a => a.Type() == CogFeatureSystem.StemType);

            Assert.That(stemAnn.Span, Is.EqualTo(_spanFactory.Create(word.Shape.First, word.Shape.ElementAt(3))));
            Annotation <ShapeNode> suffixAnn = word.Shape.Annotations.Single(a => a.Type() == CogFeatureSystem.SuffixType);

            Assert.That(suffixAnn.Span, Is.EqualTo(_spanFactory.Create(word.Shape.ElementAt(4), word.Shape.Last)));
        }
Ejemplo n.º 3
0
        private bool CheckStemWholeWord(Match <Word, ShapeNode> match)
        {
            Annotation <ShapeNode> stemAnn = match.Input.Stem;
            ShapeNode end = stemAnn.Span.End;

            while (end.Type() == CogFeatureSystem.ToneLetterType)
            {
                end = end.Prev;
            }
            return(!match.Span.Contains(_spanFactory.Create(stemAnn.Span.Start, end)));
        }
Ejemplo n.º 4
0
        private Match <TData, TOffset> CreatePatternMatch(TData input, FstResult <TData, TOffset> match)
        {
            TOffset matchStart, matchEnd;

            _fsa.GetOffsets(EntireMatch, match.Registers, out matchStart, out matchEnd);
            Span <TOffset> matchSpan     = _spanFactory.Create(matchStart, matchEnd);
            var            groupCaptures = new List <GroupCapture <TOffset> >();

            foreach (string groupName in _fsa.GroupNames)
            {
                if (groupName == EntireMatch)
                {
                    continue;
                }

                GroupCapture <TOffset> groupCapture = null;
                TOffset start, end;
                if (_fsa.GetOffsets(groupName, match.Registers, out start, out end))
                {
                    if (_spanFactory.IsValidSpan(start, end) && _spanFactory.IsRange(start, end))
                    {
                        Span <TOffset> span = _spanFactory.Create(start, end);
                        if (matchSpan.Contains(span))
                        {
                            groupCapture = new GroupCapture <TOffset>(groupName, span);
                        }
                    }
                }

                if (groupCapture == null)
                {
                    groupCapture = new GroupCapture <TOffset>(groupName, _spanFactory.Empty);
                }
                groupCaptures.Add(groupCapture);
            }

            return(new Match <TData, TOffset>(this, matchSpan, input, groupCaptures,
                                              string.IsNullOrEmpty(match.ID) ? new string[0] : match.ID.Split('*'), match.VariableBindings, match.NextAnnotation));
        }
        public void ApplyRhs(Match <Word, ShapeNode> targetMatch, Span <ShapeNode> span, VariableBindings varBindings)
        {
            ShapeNode start = null, end = null;

            foreach (GroupCapture <ShapeNode> gc in targetMatch.GroupCaptures)
            {
                if (start == null || gc.Span.Start.CompareTo(start) < 0)
                {
                    start = gc.Span.Start;
                }
                if (end == null || gc.Span.End.CompareTo(end) > 0)
                {
                    end = gc.Span.End;
                }
            }
            Debug.Assert(start != null && end != null);

            var morphs = targetMatch.Input.Morphs.Where(ann => ann.Span.Overlaps(start, end))
                         .Select(ann => new { Annotation = ann, Children = ann.Children.ToList() }).ToArray();

            foreach (var morph in morphs)
            {
                morph.Annotation.Remove();
            }

            GroupCapture <ShapeNode> leftGroup  = targetMatch.GroupCaptures[_leftGroupName];
            GroupCapture <ShapeNode> rightGroup = targetMatch.GroupCaptures[_rightGroupName];

            ShapeNode beforeRightGroup = rightGroup.Span.Start.Prev;

            MoveNodesAfter(targetMatch.Input.Shape, leftGroup.Span.End, rightGroup.Span);
            MoveNodesAfter(targetMatch.Input.Shape, beforeRightGroup, leftGroup.Span);

            foreach (var morph in morphs)
            {
                Annotation <ShapeNode>[] children = morph.Children.OrderBy(ann => ann.Span).ToArray();
                var newMorphAnn = new Annotation <ShapeNode>(_spanFactory.Create(children[0].Span.Start, children[children.Length - 1].Span.Start), morph.Annotation.FeatureStruct);
                newMorphAnn.Children.AddRange(morph.Children);
                targetMatch.Input.Annotations.Add(newMorphAnn, false);
            }
        }
Ejemplo n.º 6
0
        private bool GenerateShape(SpanFactory <ShapeNode> spanFactory, FeatureSystem featSys)
        {
            m_shape.Add(FeatureStruct.New(featSys).Symbol("bdry").Symbol("wordBdry").Value);
            var typeFeat = featSys.GetFeature <SymbolicFeature>("type");
            var catFeat  = featSys.GetFeature <SymbolicFeature>("cat");
            var inflFeat = featSys.GetFeature <ComplexFeature>("infl");
            var segments = new Dictionary <ISegment, List <Annotation <ShapeNode> > >();

            foreach (ISegment segment in m_para.SegmentsOS)
            {
                var annotations = new List <Annotation <ShapeNode> >();
                foreach (Tuple <IAnalysis, int, int> analysis in segment.GetAnalysesAndOffsets())
                {
                    // check if analyses are out-of-sync with the baseline
                    ITsString baselineStr = m_para.Contents.GetSubstring(analysis.Item2, analysis.Item3);
                    ITsString formStr     = analysis.Item1.GetForm(baselineStr.get_WritingSystemAt(0));
                    if (!baselineStr.Text.Equals(formStr.Text, StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(false);
                    }

                    var wordform = analysis.Item1 as IWfiWordform;
                    if (wordform != null)
                    {
                        var wordFS = new FeatureStruct();
                        wordFS.AddValue(typeFeat, typeFeat.PossibleSymbols["word"]);
                        foreach (int ws in wordform.Form.AvailableWritingSystemIds)
                        {
                            StringFeature strFeat;
                            if (featSys.TryGetFeature(string.Format("form-{0}", ws), out strFeat))
                            {
                                wordFS.AddValue(strFeat, wordform.Form.get_String(ws).Text);
                            }
                        }
                        ShapeNode node = m_shape.Add(wordFS);
                        node.Annotation.Data = analysis;
                        annotations.Add(node.Annotation);
                    }
                    else
                    {
                        if (analysis.Item1 is IPunctuationForm)
                        {
                            annotations.Add(null);
                            continue;
                        }

                        FeatureStruct wordInflFS    = null;
                        IWfiAnalysis  wanalysis     = analysis.Item1.Analysis;
                        ShapeNode     analysisStart = null;
                        foreach (IWfiMorphBundle mb in wanalysis.MorphBundlesOS)
                        {
                            var morphFS = new FeatureStruct();
                            morphFS.AddValue(typeFeat, typeFeat.PossibleSymbols["morph"]);
                            foreach (int ws in mb.Form.AvailableWritingSystemIds.Union(mb.MorphRA == null ? Enumerable.Empty <int>() : mb.MorphRA.Form.AvailableWritingSystemIds))
                            {
                                StringFeature strFeat;
                                if (!featSys.TryGetFeature(string.Format("form-{0}", ws), out strFeat))
                                {
                                    continue;
                                }

                                IEnumerable <string> forms  = Enumerable.Empty <string>();
                                ITsString            mbForm = mb.Form.StringOrNull(ws);
                                if (mbForm != null)
                                {
                                    forms = forms.Concat(mbForm.Text);
                                }
                                if (mb.MorphRA != null)
                                {
                                    ITsString morphForm = mb.MorphRA.Form.StringOrNull(ws);
                                    if (morphForm != null)
                                    {
                                        forms = forms.Concat(morphForm.Text);
                                    }
                                }

                                morphFS.AddValue(strFeat, forms.Distinct());
                            }
                            if (mb.SenseRA != null)
                            {
                                foreach (int ws in mb.SenseRA.Gloss.AvailableWritingSystemIds)
                                {
                                    StringFeature strFeat;
                                    if (featSys.TryGetFeature(string.Format("gloss-{0}", ws), out strFeat))
                                    {
                                        morphFS.AddValue(strFeat, mb.SenseRA.Gloss.get_String(ws).Text);
                                    }
                                }
                            }

                            if (mb.MorphRA != null)
                            {
                                var entry = (ILexEntry)mb.MorphRA.Owner;
                                foreach (int ws in entry.LexemeFormOA.Form.AvailableWritingSystemIds)
                                {
                                    StringFeature strFeat;
                                    if (featSys.TryGetFeature(string.Format("entry-{0}", ws), out strFeat))
                                    {
                                        morphFS.AddValue(strFeat, entry.LexemeFormOA.Form.get_String(ws).Text);
                                    }
                                }
                            }

                            if (mb.MsaRA != null && mb.MsaRA.ComponentsRS != null)
                            {
                                FeatureSymbol[] catSymbols = GetHvoOfMsaPartOfSpeech(mb.MsaRA).Select(hvo => catFeat.PossibleSymbols[hvo.ToString(CultureInfo.InvariantCulture)]).ToArray();
                                if (catSymbols.Length > 0)
                                {
                                    morphFS.AddValue(catFeat, catSymbols);
                                }
                                var inflFS = GetFeatureStruct(featSys, mb.MsaRA);
                                if (inflFS != null)
                                {
                                    morphFS.AddValue(inflFeat, inflFS);
                                    if (wordInflFS == null)
                                    {
                                        wordInflFS = inflFS.DeepClone();
                                    }
                                    else
                                    {
                                        wordInflFS.Union(inflFS);
                                    }
                                }
                            }

                            ShapeNode node = m_shape.Add(morphFS);
                            if (analysisStart == null)
                            {
                                analysisStart = node;
                            }
                        }

                        var wordFS = new FeatureStruct();
                        wordFS.AddValue(typeFeat, typeFeat.PossibleSymbols["word"]);
                        if (wanalysis.CategoryRA != null)
                        {
                            wordFS.AddValue(catFeat, catFeat.PossibleSymbols[wanalysis.CategoryRA.Hvo.ToString(CultureInfo.InvariantCulture)]);
                        }
                        if (wordInflFS != null && !wordInflFS.IsEmpty)
                        {
                            wordFS.AddValue(inflFeat, wordInflFS);
                        }
                        wordform = wanalysis.Wordform;
                        foreach (int ws in wordform.Form.AvailableWritingSystemIds)
                        {
                            StringFeature strFeat;
                            if (featSys.TryGetFeature(string.Format("form-{0}", ws), out strFeat))
                            {
                                wordFS.AddValue(strFeat, wordform.Form.get_String(ws).Text);
                            }
                        }
                        var gloss = analysis.Item1 as IWfiGloss;
                        if (gloss != null)
                        {
                            foreach (int ws in gloss.Form.AvailableWritingSystemIds)
                            {
                                StringFeature strFeat;
                                if (featSys.TryGetFeature(string.Format("gloss-{0}", ws), out strFeat))
                                {
                                    wordFS.AddValue(strFeat, gloss.Form.get_String(ws).Text);
                                }
                            }
                        }
                        Annotation <ShapeNode> ann;
                        if (analysisStart != null)
                        {
                            ann = m_shape.Annotations.Add(analysisStart, m_shape.Last, wordFS);
                            m_shape.Add(FeatureStruct.New(featSys).Symbol("bdry").Symbol("wordBdry").Value);
                        }
                        else
                        {
                            ShapeNode node = m_shape.Add(wordFS);
                            ann = node.Annotation;
                        }
                        ann.Data = analysis;
                        annotations.Add(ann);
                    }
                }

                segments[segment] = annotations;
                m_shape.Add(FeatureStruct.New(featSys).Symbol("bdry").Symbol("segBdry").Value);
            }

            foreach (ITextTag tag in m_para.OwnerOfClass <IStText>().TagsOC)
            {
                // skip invalid tags
                // TODO: should these tags be cleaned up somewhere?
                if (tag.BeginAnalysisIndex >= tag.BeginSegmentRA.AnalysesRS.Count || tag.EndAnalysisIndex >= tag.EndSegmentRA.AnalysesRS.Count ||
                    tag.BeginAnalysisIndex > tag.EndAnalysisIndex)
                {
                    continue;
                }
                List <Annotation <ShapeNode> > beginSegment, endSegment;
                if (!segments.TryGetValue(tag.BeginSegmentRA, out beginSegment) || !segments.TryGetValue(tag.EndSegmentRA, out endSegment))
                {
                    continue;
                }
                Annotation <ShapeNode> beginAnnotation = beginSegment[tag.BeginAnalysisIndex];
                Annotation <ShapeNode> endAnnotation   = endSegment[tag.EndAnalysisIndex];
                ICmPossibility         tagType         = tag.TagRA;
                if (tagType == null || beginAnnotation == null || endAnnotation == null)
                {
                    continue;                     // guard against LT-14549 crash
                }
                Annotation <ShapeNode> tagAnn = new Annotation <ShapeNode>(spanFactory.Create(beginAnnotation.Span.Start, endAnnotation.Span.End),
                                                                           FeatureStruct.New(featSys).Symbol("ttag").Symbol(tagType.Hvo.ToString(CultureInfo.InvariantCulture)).Value)
                {
                    Data = tag
                };
                m_shape.Annotations.Add(tagAnn, false);
            }

            return(true);
        }
Ejemplo n.º 7
0
        public void Add()
        {
            var annList = new AnnotationList <int>(_spanFactory);
            // add without subsumption
            // add to empty list
            var a = new Annotation <int>(_spanFactory.Create(49, 50), FeatureStruct.New().Value);

            annList.Add(a, false);
            Assert.AreEqual(1, annList.Count);
            Assert.AreSame(a, annList.First);
            // add to beginning of list
            a = new Annotation <int>(_spanFactory.Create(0, 1), FeatureStruct.New().Value);
            annList.Add(a, false);
            Assert.AreEqual(2, annList.Count);
            Assert.AreSame(a, annList.First);
            // add to end of list
            a = new Annotation <int>(_spanFactory.Create(99, 100), FeatureStruct.New().Value);
            annList.Add(a, false);
            Assert.AreEqual(3, annList.Count);
            Assert.AreSame(a, annList.Last);
            // add to middle of list
            a = new Annotation <int>(_spanFactory.Create(24, 25), FeatureStruct.New().Value);
            annList.Add(a, false);
            Assert.AreEqual(4, annList.Count);
            Assert.AreSame(a, annList.ElementAt(1));
            // add containing annotation
            a = new Annotation <int>(_spanFactory.Create(0, 100), FeatureStruct.New().Value);
            annList.Add(a, false);
            Assert.AreEqual(5, annList.Count);
            Assert.AreSame(a, annList.First());
            // add contained annotation
            a = new Annotation <int>(_spanFactory.Create(9, 10), FeatureStruct.New().Value);
            annList.Add(a, false);
            Assert.AreEqual(6, annList.Count);
            Assert.AreSame(a, annList.ElementAt(2));

            annList.Clear();

            // add with subsumption
            // add to empty list
            a = new Annotation <int>(_spanFactory.Create(49, 50), FeatureStruct.New().Value);
            annList.Add(a);
            Assert.AreEqual(1, annList.Count);
            Assert.AreSame(a, annList.First);
            // add to beginning of list
            a = new Annotation <int>(_spanFactory.Create(0, 1), FeatureStruct.New().Value);
            annList.Add(a);
            Assert.AreEqual(2, annList.Count);
            Assert.AreSame(a, annList.First);
            // add to end of list
            a = new Annotation <int>(_spanFactory.Create(99, 100), FeatureStruct.New().Value);
            annList.Add(a);
            Assert.AreEqual(3, annList.Count);
            Assert.AreSame(a, annList.Last);
            // add to middle of list
            a = new Annotation <int>(_spanFactory.Create(24, 25), FeatureStruct.New().Value);
            annList.Add(a);
            Assert.AreEqual(4, annList.Count);
            Assert.AreSame(a, annList.ElementAt(1));
            // add containing annotation
            a = new Annotation <int>(_spanFactory.Create(0, 100), FeatureStruct.New().Value);
            annList.Add(a);
            Assert.AreEqual(1, annList.Count);
            Assert.AreSame(a, annList.First());
            Assert.AreEqual(4, a.Children.Count);
            // add contained annotation
            a = new Annotation <int>(_spanFactory.Create(9, 10), FeatureStruct.New().Value);
            annList.Add(a);
            Assert.AreEqual(1, annList.Count);
            Assert.AreEqual(5, annList.First.Children.Count);
            Assert.AreSame(a, annList.First.Children.ElementAt(1));

            annList.Clear();

            annList.Add(0, 1, FeatureStruct.New().Value);
            annList.Add(1, 2, FeatureStruct.New().Value);
            annList.Add(2, 3, FeatureStruct.New().Value);
            annList.Add(3, 4, FeatureStruct.New().Value);
            annList.Add(4, 5, FeatureStruct.New().Value);
            annList.Add(5, 6, FeatureStruct.New().Value);
            Assert.AreEqual(6, annList.Count);
            a = new Annotation <int>(_spanFactory.Create(1, 5), FeatureStruct.New().Value);
            a.Children.Add(1, 3, FeatureStruct.New().Value);
            a.Children.Add(3, 5, FeatureStruct.New().Value);
            Assert.AreEqual(2, a.Children.Count);
            annList.Add(a);
            Assert.AreEqual(3, annList.Count);
            Assert.AreSame(a, annList.ElementAt(1));
            Assert.AreEqual(2, a.Children.Count);
            Assert.AreEqual(2, a.Children.First.Children.Count);
            Assert.AreEqual(2, a.Children.Last.Children.Count);
        }