public Span <int> Insert(AnnotatedStringData data, Annotation <int> ann, FeatureStruct fs) { KeyValuePair <char, FeatureStruct> character = _characters.Single(kvp => kvp.Value.ValueEquals(fs)); data.Insert(ann.Span.End, character.Key.ToString(CultureInfo.InvariantCulture)); return(_spanFactory.Create(ann.Span.End, ann.Span.End + 1)); }
public void SegmentWord() { var meaning = new Meaning("gloss", "category"); var word = new Word("called", 0, 4, meaning); _segmenter.Segment(word); Assert.That(word.Shape.Count, Is.EqualTo(6)); AssertShapeNodeEqual(word.Shape.First, "c", CogFeatureSystem.ConsonantType); AssertShapeNodeEqual(word.Shape.ElementAt(1), "a", CogFeatureSystem.VowelType); AssertShapeNodeEqual(word.Shape.ElementAt(4), "e", CogFeatureSystem.VowelType); AssertShapeNodeEqual(word.Shape.Last, "d", CogFeatureSystem.ConsonantType); Annotation <ShapeNode> stemAnn = word.Shape.Annotations.Single(a => a.Type() == CogFeatureSystem.StemType); Assert.That(stemAnn.Span, Is.EqualTo(_spanFactory.Create(word.Shape.First, word.Shape.ElementAt(3)))); Annotation <ShapeNode> suffixAnn = word.Shape.Annotations.Single(a => a.Type() == CogFeatureSystem.SuffixType); Assert.That(suffixAnn.Span, Is.EqualTo(_spanFactory.Create(word.Shape.ElementAt(4), word.Shape.Last))); }
private bool CheckStemWholeWord(Match <Word, ShapeNode> match) { Annotation <ShapeNode> stemAnn = match.Input.Stem; ShapeNode end = stemAnn.Span.End; while (end.Type() == CogFeatureSystem.ToneLetterType) { end = end.Prev; } return(!match.Span.Contains(_spanFactory.Create(stemAnn.Span.Start, end))); }
private Match <TData, TOffset> CreatePatternMatch(TData input, FstResult <TData, TOffset> match) { TOffset matchStart, matchEnd; _fsa.GetOffsets(EntireMatch, match.Registers, out matchStart, out matchEnd); Span <TOffset> matchSpan = _spanFactory.Create(matchStart, matchEnd); var groupCaptures = new List <GroupCapture <TOffset> >(); foreach (string groupName in _fsa.GroupNames) { if (groupName == EntireMatch) { continue; } GroupCapture <TOffset> groupCapture = null; TOffset start, end; if (_fsa.GetOffsets(groupName, match.Registers, out start, out end)) { if (_spanFactory.IsValidSpan(start, end) && _spanFactory.IsRange(start, end)) { Span <TOffset> span = _spanFactory.Create(start, end); if (matchSpan.Contains(span)) { groupCapture = new GroupCapture <TOffset>(groupName, span); } } } if (groupCapture == null) { groupCapture = new GroupCapture <TOffset>(groupName, _spanFactory.Empty); } groupCaptures.Add(groupCapture); } return(new Match <TData, TOffset>(this, matchSpan, input, groupCaptures, string.IsNullOrEmpty(match.ID) ? new string[0] : match.ID.Split('*'), match.VariableBindings, match.NextAnnotation)); }
public void ApplyRhs(Match <Word, ShapeNode> targetMatch, Span <ShapeNode> span, VariableBindings varBindings) { ShapeNode start = null, end = null; foreach (GroupCapture <ShapeNode> gc in targetMatch.GroupCaptures) { if (start == null || gc.Span.Start.CompareTo(start) < 0) { start = gc.Span.Start; } if (end == null || gc.Span.End.CompareTo(end) > 0) { end = gc.Span.End; } } Debug.Assert(start != null && end != null); var morphs = targetMatch.Input.Morphs.Where(ann => ann.Span.Overlaps(start, end)) .Select(ann => new { Annotation = ann, Children = ann.Children.ToList() }).ToArray(); foreach (var morph in morphs) { morph.Annotation.Remove(); } GroupCapture <ShapeNode> leftGroup = targetMatch.GroupCaptures[_leftGroupName]; GroupCapture <ShapeNode> rightGroup = targetMatch.GroupCaptures[_rightGroupName]; ShapeNode beforeRightGroup = rightGroup.Span.Start.Prev; MoveNodesAfter(targetMatch.Input.Shape, leftGroup.Span.End, rightGroup.Span); MoveNodesAfter(targetMatch.Input.Shape, beforeRightGroup, leftGroup.Span); foreach (var morph in morphs) { Annotation <ShapeNode>[] children = morph.Children.OrderBy(ann => ann.Span).ToArray(); var newMorphAnn = new Annotation <ShapeNode>(_spanFactory.Create(children[0].Span.Start, children[children.Length - 1].Span.Start), morph.Annotation.FeatureStruct); newMorphAnn.Children.AddRange(morph.Children); targetMatch.Input.Annotations.Add(newMorphAnn, false); } }
private bool GenerateShape(SpanFactory <ShapeNode> spanFactory, FeatureSystem featSys) { m_shape.Add(FeatureStruct.New(featSys).Symbol("bdry").Symbol("wordBdry").Value); var typeFeat = featSys.GetFeature <SymbolicFeature>("type"); var catFeat = featSys.GetFeature <SymbolicFeature>("cat"); var inflFeat = featSys.GetFeature <ComplexFeature>("infl"); var segments = new Dictionary <ISegment, List <Annotation <ShapeNode> > >(); foreach (ISegment segment in m_para.SegmentsOS) { var annotations = new List <Annotation <ShapeNode> >(); foreach (Tuple <IAnalysis, int, int> analysis in segment.GetAnalysesAndOffsets()) { // check if analyses are out-of-sync with the baseline ITsString baselineStr = m_para.Contents.GetSubstring(analysis.Item2, analysis.Item3); ITsString formStr = analysis.Item1.GetForm(baselineStr.get_WritingSystemAt(0)); if (!baselineStr.Text.Equals(formStr.Text, StringComparison.InvariantCultureIgnoreCase)) { return(false); } var wordform = analysis.Item1 as IWfiWordform; if (wordform != null) { var wordFS = new FeatureStruct(); wordFS.AddValue(typeFeat, typeFeat.PossibleSymbols["word"]); foreach (int ws in wordform.Form.AvailableWritingSystemIds) { StringFeature strFeat; if (featSys.TryGetFeature(string.Format("form-{0}", ws), out strFeat)) { wordFS.AddValue(strFeat, wordform.Form.get_String(ws).Text); } } ShapeNode node = m_shape.Add(wordFS); node.Annotation.Data = analysis; annotations.Add(node.Annotation); } else { if (analysis.Item1 is IPunctuationForm) { annotations.Add(null); continue; } FeatureStruct wordInflFS = null; IWfiAnalysis wanalysis = analysis.Item1.Analysis; ShapeNode analysisStart = null; foreach (IWfiMorphBundle mb in wanalysis.MorphBundlesOS) { var morphFS = new FeatureStruct(); morphFS.AddValue(typeFeat, typeFeat.PossibleSymbols["morph"]); foreach (int ws in mb.Form.AvailableWritingSystemIds.Union(mb.MorphRA == null ? Enumerable.Empty <int>() : mb.MorphRA.Form.AvailableWritingSystemIds)) { StringFeature strFeat; if (!featSys.TryGetFeature(string.Format("form-{0}", ws), out strFeat)) { continue; } IEnumerable <string> forms = Enumerable.Empty <string>(); ITsString mbForm = mb.Form.StringOrNull(ws); if (mbForm != null) { forms = forms.Concat(mbForm.Text); } if (mb.MorphRA != null) { ITsString morphForm = mb.MorphRA.Form.StringOrNull(ws); if (morphForm != null) { forms = forms.Concat(morphForm.Text); } } morphFS.AddValue(strFeat, forms.Distinct()); } if (mb.SenseRA != null) { foreach (int ws in mb.SenseRA.Gloss.AvailableWritingSystemIds) { StringFeature strFeat; if (featSys.TryGetFeature(string.Format("gloss-{0}", ws), out strFeat)) { morphFS.AddValue(strFeat, mb.SenseRA.Gloss.get_String(ws).Text); } } } if (mb.MorphRA != null) { var entry = (ILexEntry)mb.MorphRA.Owner; foreach (int ws in entry.LexemeFormOA.Form.AvailableWritingSystemIds) { StringFeature strFeat; if (featSys.TryGetFeature(string.Format("entry-{0}", ws), out strFeat)) { morphFS.AddValue(strFeat, entry.LexemeFormOA.Form.get_String(ws).Text); } } } if (mb.MsaRA != null && mb.MsaRA.ComponentsRS != null) { FeatureSymbol[] catSymbols = GetHvoOfMsaPartOfSpeech(mb.MsaRA).Select(hvo => catFeat.PossibleSymbols[hvo.ToString(CultureInfo.InvariantCulture)]).ToArray(); if (catSymbols.Length > 0) { morphFS.AddValue(catFeat, catSymbols); } var inflFS = GetFeatureStruct(featSys, mb.MsaRA); if (inflFS != null) { morphFS.AddValue(inflFeat, inflFS); if (wordInflFS == null) { wordInflFS = inflFS.DeepClone(); } else { wordInflFS.Union(inflFS); } } } ShapeNode node = m_shape.Add(morphFS); if (analysisStart == null) { analysisStart = node; } } var wordFS = new FeatureStruct(); wordFS.AddValue(typeFeat, typeFeat.PossibleSymbols["word"]); if (wanalysis.CategoryRA != null) { wordFS.AddValue(catFeat, catFeat.PossibleSymbols[wanalysis.CategoryRA.Hvo.ToString(CultureInfo.InvariantCulture)]); } if (wordInflFS != null && !wordInflFS.IsEmpty) { wordFS.AddValue(inflFeat, wordInflFS); } wordform = wanalysis.Wordform; foreach (int ws in wordform.Form.AvailableWritingSystemIds) { StringFeature strFeat; if (featSys.TryGetFeature(string.Format("form-{0}", ws), out strFeat)) { wordFS.AddValue(strFeat, wordform.Form.get_String(ws).Text); } } var gloss = analysis.Item1 as IWfiGloss; if (gloss != null) { foreach (int ws in gloss.Form.AvailableWritingSystemIds) { StringFeature strFeat; if (featSys.TryGetFeature(string.Format("gloss-{0}", ws), out strFeat)) { wordFS.AddValue(strFeat, gloss.Form.get_String(ws).Text); } } } Annotation <ShapeNode> ann; if (analysisStart != null) { ann = m_shape.Annotations.Add(analysisStart, m_shape.Last, wordFS); m_shape.Add(FeatureStruct.New(featSys).Symbol("bdry").Symbol("wordBdry").Value); } else { ShapeNode node = m_shape.Add(wordFS); ann = node.Annotation; } ann.Data = analysis; annotations.Add(ann); } } segments[segment] = annotations; m_shape.Add(FeatureStruct.New(featSys).Symbol("bdry").Symbol("segBdry").Value); } foreach (ITextTag tag in m_para.OwnerOfClass <IStText>().TagsOC) { // skip invalid tags // TODO: should these tags be cleaned up somewhere? if (tag.BeginAnalysisIndex >= tag.BeginSegmentRA.AnalysesRS.Count || tag.EndAnalysisIndex >= tag.EndSegmentRA.AnalysesRS.Count || tag.BeginAnalysisIndex > tag.EndAnalysisIndex) { continue; } List <Annotation <ShapeNode> > beginSegment, endSegment; if (!segments.TryGetValue(tag.BeginSegmentRA, out beginSegment) || !segments.TryGetValue(tag.EndSegmentRA, out endSegment)) { continue; } Annotation <ShapeNode> beginAnnotation = beginSegment[tag.BeginAnalysisIndex]; Annotation <ShapeNode> endAnnotation = endSegment[tag.EndAnalysisIndex]; ICmPossibility tagType = tag.TagRA; if (tagType == null || beginAnnotation == null || endAnnotation == null) { continue; // guard against LT-14549 crash } Annotation <ShapeNode> tagAnn = new Annotation <ShapeNode>(spanFactory.Create(beginAnnotation.Span.Start, endAnnotation.Span.End), FeatureStruct.New(featSys).Symbol("ttag").Symbol(tagType.Hvo.ToString(CultureInfo.InvariantCulture)).Value) { Data = tag }; m_shape.Annotations.Add(tagAnn, false); } return(true); }
public void Add() { var annList = new AnnotationList <int>(_spanFactory); // add without subsumption // add to empty list var a = new Annotation <int>(_spanFactory.Create(49, 50), FeatureStruct.New().Value); annList.Add(a, false); Assert.AreEqual(1, annList.Count); Assert.AreSame(a, annList.First); // add to beginning of list a = new Annotation <int>(_spanFactory.Create(0, 1), FeatureStruct.New().Value); annList.Add(a, false); Assert.AreEqual(2, annList.Count); Assert.AreSame(a, annList.First); // add to end of list a = new Annotation <int>(_spanFactory.Create(99, 100), FeatureStruct.New().Value); annList.Add(a, false); Assert.AreEqual(3, annList.Count); Assert.AreSame(a, annList.Last); // add to middle of list a = new Annotation <int>(_spanFactory.Create(24, 25), FeatureStruct.New().Value); annList.Add(a, false); Assert.AreEqual(4, annList.Count); Assert.AreSame(a, annList.ElementAt(1)); // add containing annotation a = new Annotation <int>(_spanFactory.Create(0, 100), FeatureStruct.New().Value); annList.Add(a, false); Assert.AreEqual(5, annList.Count); Assert.AreSame(a, annList.First()); // add contained annotation a = new Annotation <int>(_spanFactory.Create(9, 10), FeatureStruct.New().Value); annList.Add(a, false); Assert.AreEqual(6, annList.Count); Assert.AreSame(a, annList.ElementAt(2)); annList.Clear(); // add with subsumption // add to empty list a = new Annotation <int>(_spanFactory.Create(49, 50), FeatureStruct.New().Value); annList.Add(a); Assert.AreEqual(1, annList.Count); Assert.AreSame(a, annList.First); // add to beginning of list a = new Annotation <int>(_spanFactory.Create(0, 1), FeatureStruct.New().Value); annList.Add(a); Assert.AreEqual(2, annList.Count); Assert.AreSame(a, annList.First); // add to end of list a = new Annotation <int>(_spanFactory.Create(99, 100), FeatureStruct.New().Value); annList.Add(a); Assert.AreEqual(3, annList.Count); Assert.AreSame(a, annList.Last); // add to middle of list a = new Annotation <int>(_spanFactory.Create(24, 25), FeatureStruct.New().Value); annList.Add(a); Assert.AreEqual(4, annList.Count); Assert.AreSame(a, annList.ElementAt(1)); // add containing annotation a = new Annotation <int>(_spanFactory.Create(0, 100), FeatureStruct.New().Value); annList.Add(a); Assert.AreEqual(1, annList.Count); Assert.AreSame(a, annList.First()); Assert.AreEqual(4, a.Children.Count); // add contained annotation a = new Annotation <int>(_spanFactory.Create(9, 10), FeatureStruct.New().Value); annList.Add(a); Assert.AreEqual(1, annList.Count); Assert.AreEqual(5, annList.First.Children.Count); Assert.AreSame(a, annList.First.Children.ElementAt(1)); annList.Clear(); annList.Add(0, 1, FeatureStruct.New().Value); annList.Add(1, 2, FeatureStruct.New().Value); annList.Add(2, 3, FeatureStruct.New().Value); annList.Add(3, 4, FeatureStruct.New().Value); annList.Add(4, 5, FeatureStruct.New().Value); annList.Add(5, 6, FeatureStruct.New().Value); Assert.AreEqual(6, annList.Count); a = new Annotation <int>(_spanFactory.Create(1, 5), FeatureStruct.New().Value); a.Children.Add(1, 3, FeatureStruct.New().Value); a.Children.Add(3, 5, FeatureStruct.New().Value); Assert.AreEqual(2, a.Children.Count); annList.Add(a); Assert.AreEqual(3, annList.Count); Assert.AreSame(a, annList.ElementAt(1)); Assert.AreEqual(2, a.Children.Count); Assert.AreEqual(2, a.Children.First.Children.Count); Assert.AreEqual(2, a.Children.Last.Children.Count); }