Пример #1
0
        protected override void AutoDetectMoney(ParseTreeDrawable parseTree)
        {
            NodeDrawableCollector    nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode());
            List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect();

            for (int i = 0; i < leafList.Count; i++)
            {
                ParseNodeDrawable parseNode = leafList[i];
                if (!parseNode.LayerExists(ViewLayerType.NER))
                {
                    String word = parseNode.GetLayerData(ViewLayerType.TURKISH_WORD).ToLower(new CultureInfo("tr"));
                    if (Word.IsMoney(word))
                    {
                        parseNode.GetLayerInfo().SetLayerData(ViewLayerType.NER, "MONEY");
                        int j = i - 1;
                        while (j >= 0)
                        {
                            ParseNodeDrawable previous = leafList[j];
                            if (previous.GetParent().GetData().GetName().Equals("CD"))
                            {
                                previous.GetLayerInfo().SetLayerData(ViewLayerType.NER, "MONEY");
                            }
                            else
                            {
                                break;
                            }
                            j--;
                        }
                    }
                }
            }
        }
        public void TestAccuracy()
        {
            int correct = 0, total = 0;
            var mostFrequentTreeAutoSemantic = new MostFrequentTreeAutoSemantic(wordNet, fsm);
            var treeBank1 = new TreeBankDrawable("../../../new-trees/");
            var treeBank2 = new TreeBankDrawable("../../../old-trees/");

            for (var i = 0; i < treeBank1.Size(); i++)
            {
                var parseTree1 = treeBank1.Get(i);
                var parseTree2 = treeBank2.Get(i);
                mostFrequentTreeAutoSemantic.AutoSemantic(parseTree1);
                var nodeDrawableCollector1 =
                    new NodeDrawableCollector((ParseNodeDrawable)parseTree1.GetRoot(), new IsTurkishLeafNode());
                var leafList1 = nodeDrawableCollector1.Collect();
                var nodeDrawableCollector2 =
                    new NodeDrawableCollector((ParseNodeDrawable)parseTree2.GetRoot(), new IsTurkishLeafNode());
                var leafList2 = nodeDrawableCollector2.Collect();
                for (var j = 0; j < leafList1.Count; j++)
                {
                    total++;
                    var parseNode1 = leafList1[j];
                    var parseNode2 = leafList2[j];
                    if (parseNode1.GetLayerData(ViewLayerType.SEMANTICS) != null && parseNode1
                        .GetLayerData(ViewLayerType.SEMANTICS).Equals(parseNode2.GetLayerData(ViewLayerType.SEMANTICS)))
                    {
                        correct++;
                    }
                }
            }

            Assert.AreEqual(475, total);
            Assert.AreEqual(260, correct);
        }
 public AnnotatedSentence.AnnotatedSentence Convert(ParseTreeDrawable parseTree, List <TreeEnsembleModel> models)
 {
     if (parseTree != null)
     {
         var annotatedSentence     = new AnnotatedSentence.AnnotatedSentence();
         var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsLeafNode());
         var leafList         = nodeDrawableCollector.Collect();
         var wordNodePairList = new List <WordNodePair>();
         for (var i = 0; i < leafList.Count; i++)
         {
             var parseNode    = leafList[i];
             var wordNodePair = new WordNodePair(parseNode, i + 1);
             wordNodePair.UpdateNode();
             if (wordNodePair.GetNode().GetParent() != null && wordNodePair.GetNode().GetParent().NumberOfChildren() == 1)
             {
                 wordNodePair.UpdateNode();
                 Console.WriteLine("check this");
                 return(null);
             }
             annotatedSentence.AddWord(wordNodePair.GetWord());
             wordNodePairList.Add(wordNodePair);
         }
         ConstructDependenciesFromTree(wordNodePairList, models);
         return(annotatedSentence);
     }
     return(null);
 }
        public List <ParseNodeDrawable> ExtractNodesWithPredicateVerbs(WordNet.WordNet wordNet)
        {
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)root, new IsPredicateVerbNode(wordNet));

            return(nodeDrawableCollector.Collect());
        }
        public void autoArgument(ParseTreeDrawable parseTree, Frameset frameset)
        {
            NodeDrawableCollector nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTransferable(secondLanguage));
            List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                if (parseNode.GetLayerData(ViewLayerType.PROPBANK) == null)
                {
                    foreach (ArgumentType argumentType in Enum.GetValues(typeof(ArgumentType)))
                    {
                        if (frameset.ContainsArgument(argumentType) && AutoDetectArgument(parseNode, argumentType))
                        {
                            parseNode.GetLayerInfo().SetLayerData(ViewLayerType.PROPBANK,
                                                                  ArgumentTypeStatic.GetPropbankType(argumentType));
                        }
                    }

                    if (Word.IsPunctuation(parseNode.GetLayerData(secondLanguage)))
                    {
                        parseNode.GetLayerInfo().SetLayerData(ViewLayerType.PROPBANK, "NONE");
                    }
                }
            }

            parseTree.Save();
        }
Пример #6
0
        protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree)
        {
            var modified = false;
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                var info = parseNode.GetLayerInfo();
                if (info.GetLayerData(ViewLayerType.INFLECTIONAL_GROUP) != null)
                {
                    var meanings = new List <SynSet> [info.GetNumberOfWords()];
                    for (var i = 0; i < info.GetNumberOfWords(); i++)
                    {
                        meanings[i] = _turkishWordNet.ConstructSynSets(
                            info.GetMorphologicalParseAt(i).GetWord().GetName(), info.GetMorphologicalParseAt(i),
                            info.GetMetamorphicParseAt(i), _fsm);
                    }

                    switch (info.GetNumberOfWords())
                    {
                    case 1:
                        if (meanings[0].Count == 1)
                        {
                            modified = true;
                            parseNode.GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS,
                                                                  meanings[0][0].GetId());
                        }

                        break;

                    case 2:
                        if (meanings[0].Count == 1 && meanings[1].Count == 1)
                        {
                            modified = true;
                            parseNode.GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS,
                                                                  meanings[0][0].GetId() + "$" + meanings[1][0].GetId());
                        }

                        break;

                    case 3:
                        if (meanings[0].Count == 1 && meanings[1].Count == 1 && meanings[2].Count == 1)
                        {
                            modified = true;
                            parseNode.GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS,
                                                                  meanings[0][0].GetId() + "$" + meanings[1][0].GetId() + "$" +
                                                                  meanings[2][0].GetId());
                        }

                        break;
                    }
                }
            }

            return(modified);
        }
Пример #7
0
        protected override void AutoDetectLocation(ParseTreeDrawable parseTree)
        {
            NodeDrawableCollector    nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode());
            List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                if (!parseNode.LayerExists(ViewLayerType.NER))
                {
                    String word = parseNode.GetLayerData(ViewLayerType.TURKISH_WORD).ToLower(new CultureInfo("tr"));
                    parseNode.CheckGazetteer(locationGazetteer, word);
                }
            }
        }
        public void ExtractVerbal()
        {
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)root, new IsVPNode());
            var nodeList = nodeDrawableCollector.Collect();

            foreach (var node in nodeList)
            {
                if (node.ExtractVerbal())
                {
                    return;
                }
            }
        }
        protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree)
        {
            var random = new Random(1);
            var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            for (var i = 0; i < leafList.Count; i++)
            {
                var synSets = GetCandidateSynSets(_turkishWordNet, _fsm, leafList, i);
                if (synSets.Count > 0)
                {
                    leafList[i].GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, synSets[random.Next(synSets.Count)].GetId());
                }
            }
            return(true);
        }
        public ParseNodeDrawable PreviousLeafNode(ParseNodeDrawable parseNode)
        {
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)root, new IsTurkishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            for (var i = 1; i < leafList.Count; i++)
            {
                if (leafList[i].Equals(parseNode))
                {
                    return(leafList[i - 1]);
                }
            }

            return(null);
        }
        public AnnotatedSentence.AnnotatedSentence GenerateAnnotatedSentence(string language)
        {
            var sentence = new AnnotatedSentence.AnnotatedSentence("");
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)root, new IsEnglishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                var newWord = new AnnotatedWord("{" + language + "=" + parseNode.GetData().GetName() + "}{posTag="
                                                + parseNode.GetParent().GetData().GetName() + "}");
                sentence.AddWord(newWord);
            }

            return(sentence);
        }
Пример #12
0
        protected override void AutoDetectPerson(ParseTreeDrawable parseTree)
        {
            NodeDrawableCollector    nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode());
            List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                if (!parseNode.LayerExists(ViewLayerType.NER))
                {
                    String word = parseNode.GetLayerData(ViewLayerType.TURKISH_WORD).ToLower(new CultureInfo("tr"));
                    if (Word.IsHonorific(word) && parseNode.GetParent().GetData().GetName().Equals("NNP"))
                    {
                        parseNode.GetLayerInfo().SetLayerData(ViewLayerType.NER, "PERSON");
                    }
                    parseNode.CheckGazetteer(personGazetteer, word);
                }
            }
        }
        public AnnotatedSentence.AnnotatedSentence GenerateAnnotatedSentence()
        {
            var sentence = new AnnotatedSentence.AnnotatedSentence("");
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)root, new IsTurkishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                var layers = parseNode.GetLayerInfo();
                for (var i = 0; i < layers.GetNumberOfWords(); i++)
                {
                    sentence.AddWord(layers.ToAnnotatedWord(i));
                }
            }

            return(sentence);
        }
Пример #14
0
        public void AutoNer(ParseTreeDrawable parseTree)
        {
            AutoDetectPerson(parseTree);
            AutoDetectLocation(parseTree);
            AutoDetectOrganization(parseTree);
            AutoDetectMoney(parseTree);
            AutoDetectTime(parseTree);
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTransferable(secondLanguage));
            var leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                if (!parseNode.LayerExists(ViewLayerType.NER))
                {
                    parseNode.GetLayerInfo().SetLayerData(ViewLayerType.NER, "NONE");
                }
            }
            parseTree.Save();
        }
        public bool UpdateConnectedPredicate(string previousId, string currentId)
        {
            var modified = false;
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)root, new IsTurkishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                if (parseNode.GetLayerInfo().GetArgument() != null &&
                    parseNode.GetLayerInfo().GetArgument().GetId() != null &&
                    parseNode.GetLayerInfo().GetArgument().GetId().Equals(previousId))
                {
                    parseNode.GetLayerInfo().SetLayerData(ViewLayerType.PROPBANK,
                                                          parseNode.GetLayerInfo().GetArgument().GetArgumentType() + "$" + currentId);
                    modified = true;
                }
            }

            return(modified);
        }
Пример #16
0
        protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree)
        {
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            for (int i = 0; i < leafList.Count; i++)
            {
                var synSets = GetCandidateSynSets(_turkishWordNet, _fsm, leafList, i);
                if (synSets.Count > 0)
                {
                    var best = MostFrequent(synSets,
                                            leafList[i].GetLayerInfo().GetMorphologicalParseAt(0).GetWord().GetName());
                    if (best != null)
                    {
                        leafList[i].GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, best.GetId());
                    }
                }
            }

            return(true);
        }
Пример #17
0
        protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree)
        {
            var random = new Random(1);
            var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode());
            var leafList = nodeDrawableCollector.Collect();
            var done     = false;

            for (var i = 0; i < leafList.Count; i++)
            {
                var synSets         = GetCandidateSynSets(_turkishWordNet, _fsm, leafList, i);
                var maxIntersection = -1;
                for (var j = 0; j < synSets.Count; j++)
                {
                    var synSet            = synSets[j];
                    var intersectionCount = Intersection(synSet, leafList);
                    if (intersectionCount > maxIntersection)
                    {
                        maxIntersection = intersectionCount;
                    }
                }
                var maxSynSets = new List <SynSet>();
                for (var j = 0; j < synSets.Count; j++)
                {
                    var synSet = synSets[j];
                    if (Intersection(synSet, leafList) == maxIntersection)
                    {
                        maxSynSets.Add(synSet);
                    }
                }
                if (maxSynSets.Count > 0)
                {
                    leafList[i].GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, maxSynSets[random.Next(maxSynSets.Count)].GetId());
                    done = true;
                }
            }
            return(done);
        }
        private void SetShallowParseLayer(ChunkType chunkType, string label)
        {
            var startWord             = true;
            var nodeLabel             = "";
            var nodeDrawableCollector = new NodeDrawableCollector(this, new IsTurkishLeafNode());

            var leafList = nodeDrawableCollector.Collect();

            if (SentenceLabels.Contains(label))
            {
                label = label.Replace(label, "S");
            }
            switch (chunkType)
            {
            case ChunkType.EXISTS:
                label = "";
                break;

            case ChunkType.NORMAL:
                label = label.Replace("-.*", "");
                label = "-" + label;
                break;

            case ChunkType.DETAILED:
                label = label.Replace("[-=](\\d)+$", "");
                if (label.Contains("-"))
                {
                    label = label.Substring(0, label.IndexOf('-') + 4);
                }

                label = "-" + label;
                break;
            }

            foreach (var node in leafList)
            {
                var layersInNode = node.GetLayerInfo();
                for (var i = 0; i < layersInNode.GetNumberOfWords(); i++)
                {
                    string wordLabel;
                    if (startWord)
                    {
                        wordLabel = "B" + label;
                        startWord = false;
                    }
                    else
                    {
                        wordLabel = "I" + label;
                    }

                    if (i == 0)
                    {
                        nodeLabel = wordLabel;
                    }
                    else
                    {
                        nodeLabel = nodeLabel + " " + wordLabel;
                    }
                }

                node.GetLayerInfo().SetLayerData(ViewLayerType.SHALLOW_PARSE, nodeLabel);
            }
        }