예제 #1
0
파일: Stemmer.cs 프로젝트: JRetza/cog
        private bool CheckStemWholeWord(Match <Word, ShapeNode> match)
        {
            Annotation <ShapeNode> stemAnn = match.Input.Stem;
            ShapeNode end = stemAnn.Range.End;

            while (end.Type() == CogFeatureSystem.ToneLetterType)
            {
                end = end.Prev;
            }
            return(!match.Range.Contains(Range <ShapeNode> .Create(stemAnn.Range.Start, end)));
        }
예제 #2
0
        private void ProcessSyllableWithMaximalOnset(ShapeNode startNode, ShapeNode endNode, Shape newShape)
        {
            ShapeNode node       = startNode;
            ShapeNode onsetStart = node;

            while (node.Type() == CogFeatureSystem.ConsonantType && node != endNode.Next)
            {
                node = node.Next;
            }
            ShapeNode onsetEnd = node.Prev;

            if (onsetStart != node && onsetStart != onsetEnd)
            {
                ShapeNode n = onsetStart;
                if (onsetStart != onsetEnd.List.First)
                {
                    for (; n != onsetEnd.Next; n = n.Next)
                    {
                        string onsetStr = n.GetNodes(onsetEnd).StrRep();
                        if (_initialOnsets.Value.Contains(onsetStr))
                        {
                            break;
                        }
                    }

                    // TODO: ambiguous onset, what should we do? For now, we just assume maximal onset
                    if (n == onsetEnd.Next)
                    {
                        n = onsetStart;
                    }
                }
                if (n != onsetStart)
                {
                    if (onsetStart.Prev.Type() == CogFeatureSystem.ConsonantType)
                    {
                        CombineWith(newShape.GetLast(nd => nd.Type() == CogFeatureSystem.ConsonantType), onsetStart, n.Prev);
                    }
                    else
                    {
                        Combine(CogFeatureSystem.Coda, newShape, onsetStart, n.Prev);
                        Annotation <ShapeNode> prevSyllableAnn = newShape.Annotations.Last(ann => ann.Type() == CogFeatureSystem.SyllableType);
                        prevSyllableAnn.Remove();
                        newShape.Annotations.Add(prevSyllableAnn.Range.Start, newShape.Last, FeatureStruct.New().Symbol(CogFeatureSystem.SyllableType).Value);
                    }
                    startNode = n;
                }
            }

            ProcessSyllable(startNode, endNode, newShape);
        }
예제 #3
0
        private void CombineWith(ShapeNode node, ShapeNode start, ShapeNode end)
        {
            if ((CombineVowels && node.Type() == CogFeatureSystem.VowelType) || (CombineConsonants && node.Type() == CogFeatureSystem.ConsonantType))
            {
                var strRep     = new StringBuilder();
                var origStrRep = new StringBuilder();
                strRep.Append(node.StrRep());
                origStrRep.Append(node.OriginalStrRep());
                ShapeNode n = start;
                while (n != end.Next)
                {
                    strRep.Append(n.StrRep());
                    origStrRep.Append(n.OriginalStrRep());
                    node.Annotation.FeatureStruct.Add(n.Annotation.FeatureStruct);
                    n = n.Next;
                }
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.StrRep, strRep.ToString());
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.OriginalStrRep, origStrRep.ToString());
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.SegmentType, CogFeatureSystem.Complex);

                FeatureStruct firstFS;
                if (start.IsComplex())
                {
                    firstFS = start.Annotation.FeatureStruct.GetValue(CogFeatureSystem.First);
                }
                else
                {
                    firstFS = new FeatureStruct();
                    foreach (Feature feature in start.Annotation.FeatureStruct.Features.Where(f => !CogFeatureSystem.Instance.ContainsFeature(f)))
                    {
                        firstFS.AddValue(feature, start.Annotation.FeatureStruct.GetValue(feature));
                    }
                }
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.First, firstFS);
            }
            else
            {
                ShapeNode n = start;
                while (n != end.Next)
                {
                    var newNode = n.Clone();
                    node.AddAfter(newNode);
                    node = newNode;
                    n    = n.Next;
                }
            }
        }
예제 #4
0
        public override bool Matches(ShapeNode leftNode, Ngram <Segment> target, ShapeNode rightNode)
        {
            string strRep = target.ToString();

            if (_ignoreModifiers)
            {
                strRep = StripModifiers(strRep);
            }

            if (leftNode != null && leftNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("#{0}", strRep)))
            {
                return(true);
            }

            if (rightNode != null && rightNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("{0}#", strRep)))
            {
                return(true);
            }

            return(_normalizedSegments.Contains(strRep));
        }
예제 #5
0
        private string GetInitialOnset(Annotation <ShapeNode> ann)
        {
            if (ann == null)
            {
                return(null);
            }

            ShapeNode node = ann.Range.Start;

            while (node.Type() == CogFeatureSystem.ConsonantType && ann.Range.Contains(node))
            {
                node = node.Next;
            }

            if (node == ann.Range.Start)
            {
                return(null);
            }

            return(ann.Range.Start.GetNodes(node.Prev).StrRep());
        }
예제 #6
0
        protected void ProcessSyllable(ShapeNode startNode, ShapeNode endNode, Shape newShape)
        {
            ShapeNode newStartNode = null;
            ShapeNode node         = startNode;

            while (node.Type() == CogFeatureSystem.BoundaryType && node != endNode.Next)
            {
                ShapeNode newNode = node.DeepClone();
                newShape.Add(newNode);
                if (newStartNode == null)
                {
                    newStartNode = newNode;
                }
                node = node.Next;
            }

            ShapeNode onsetStart = node;

            while (node.Type() == CogFeatureSystem.ConsonantType && node != endNode.Next)
            {
                node = node.Next;
            }
            ShapeNode onsetEnd = node.Prev;

            if (onsetStart != node)
            {
                ShapeNode start = Combine(CogFeatureSystem.Onset, newShape, onsetStart, onsetEnd);
                if (newStartNode == null)
                {
                    newStartNode = start;
                }
            }

            if (node != endNode.Next)
            {
                ShapeNode nucleusStart = node;
                while (node.Type() == CogFeatureSystem.VowelType && node != endNode.Next)
                {
                    node = node.Next;
                }
                ShapeNode nucleusEnd = node.Prev;

                ShapeNode start = Combine(CogFeatureSystem.Nucleus, newShape, nucleusStart, nucleusEnd);
                if (newStartNode == null)
                {
                    newStartNode = start;
                }
            }

            if (node != endNode.Next)
            {
                ShapeNode codaStart = node;
                while (node.Type() == CogFeatureSystem.ConsonantType && node != endNode.Next)
                {
                    node = node.Next;
                }
                if (codaStart != node)
                {
                    ShapeNode codaEnd = node.Prev;
                    Combine(CogFeatureSystem.Coda, newShape, codaStart, codaEnd);
                }
            }

            while (node != endNode.Next)
            {
                newShape.Add(node.DeepClone());
                node = node.Next;
            }
            newShape.Annotations.Add(newStartNode, newShape.Last, FeatureStruct.New().Symbol(CogFeatureSystem.SyllableType).Value);
        }
예제 #7
0
 private bool NodeFilter(ShapeNode n)
 {
     return(n.Type().IsOneOf(CogFeatureSystem.ConsonantType, CogFeatureSystem.VowelType, CogFeatureSystem.AnchorType));
 }
예제 #8
0
 private void AssertShapeNodeEqual(ShapeNode actualNode, string expectedStrRep, FeatureSymbol expectedType)
 {
     Assert.That(actualNode.StrRep(), Is.EqualTo(expectedStrRep));
     Assert.That(actualNode.Type(), Is.EqualTo(expectedType));
 }
예제 #9
0
 private bool CheckEnvironment(Environment env, ShapeNode leftNode, ShapeNode rightNode)
 {
     return((env.LeftEnvironment == null || env.LeftEnvironment == leftNode.Type()) && (env.RightEnvironment == null || env.RightEnvironment == rightNode.Type()));
 }
예제 #10
0
 private static bool NodeFilter(ShapeNode node)
 {
     return node.Type().IsOneOf(CogFeatureSystem.VowelType, CogFeatureSystem.ConsonantType, CogFeatureSystem.AnchorType);
 }
예제 #11
0
 private bool CheckEnvironment(Environment env, ShapeNode leftNode, ShapeNode rightNode)
 {
     return (env.LeftEnvironment == null || env.LeftEnvironment == leftNode.Type()) && (env.RightEnvironment == null || env.RightEnvironment == rightNode.Type());
 }
예제 #12
0
        public override bool Matches(ShapeNode leftNode, Ngram<Segment> target, ShapeNode rightNode)
        {
            string strRep = target.ToString();
            if (_ignoreModifiers)
                strRep = StripModifiers(strRep);

            if (leftNode != null && leftNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("#{0}", strRep)))
                return true;

            if (rightNode != null && rightNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("{0}#", strRep)))
                return true;

            return _normalizedSegments.Contains(strRep);
        }
예제 #13
0
파일: SegmenterTests.cs 프로젝트: rmunn/cog
 private void AssertShapeNodeEqual(ShapeNode actualNode, string expectedStrRep, FeatureSymbol expectedType)
 {
     Assert.That(actualNode.StrRep(), Is.EqualTo(expectedStrRep));
     Assert.That(actualNode.Type(), Is.EqualTo(expectedType));
 }
예제 #14
0
파일: SspSyllabifier.cs 프로젝트: rmunn/cog
        private void CombineWith(ShapeNode node, ShapeNode start, ShapeNode end)
        {
            if ((CombineVowels && node.Type() == CogFeatureSystem.VowelType) || (CombineConsonants && node.Type() == CogFeatureSystem.ConsonantType))
            {
                var strRep = new StringBuilder();
                var origStrRep = new StringBuilder();
                strRep.Append(node.StrRep());
                origStrRep.Append(node.OriginalStrRep());
                ShapeNode n = start;
                while (n != end.Next)
                {
                    strRep.Append(n.StrRep());
                    origStrRep.Append(n.OriginalStrRep());
                    node.Annotation.FeatureStruct.Add(n.Annotation.FeatureStruct);
                    n = n.Next;
                }
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.StrRep, strRep.ToString());
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.OriginalStrRep, origStrRep.ToString());
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.SegmentType, CogFeatureSystem.Complex);

                FeatureStruct firstFS;
                if (start.IsComplex())
                {
                    firstFS = start.Annotation.FeatureStruct.GetValue(CogFeatureSystem.First);
                }
                else
                {
                    firstFS = new FeatureStruct();
                    foreach (Feature feature in start.Annotation.FeatureStruct.Features.Where(f => !CogFeatureSystem.Instance.ContainsFeature(f)))
                        firstFS.AddValue(feature, start.Annotation.FeatureStruct.GetValue(feature));
                }
                node.Annotation.FeatureStruct.AddValue(CogFeatureSystem.First, firstFS);
            }
            else
            {
                ShapeNode n = start;
                while (n != end.Next)
                {
                    var newNode = n.DeepClone();
                    node.AddAfter(newNode);
                    node = newNode;
                    n = n.Next;
                }
            }
        }