protected override void AutoDetectMoney(ParseTreeDrawable parseTree) { NodeDrawableCollector nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode()); List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect(); for (int i = 0; i < leafList.Count; i++) { ParseNodeDrawable parseNode = leafList[i]; if (!parseNode.LayerExists(ViewLayerType.NER)) { String word = parseNode.GetLayerData(ViewLayerType.TURKISH_WORD).ToLower(new CultureInfo("tr")); if (Word.IsMoney(word)) { parseNode.GetLayerInfo().SetLayerData(ViewLayerType.NER, "MONEY"); int j = i - 1; while (j >= 0) { ParseNodeDrawable previous = leafList[j]; if (previous.GetParent().GetData().GetName().Equals("CD")) { previous.GetLayerInfo().SetLayerData(ViewLayerType.NER, "MONEY"); } else { break; } j--; } } } } }
public void TestAccuracy() { int correct = 0, total = 0; var mostFrequentTreeAutoSemantic = new MostFrequentTreeAutoSemantic(wordNet, fsm); var treeBank1 = new TreeBankDrawable("../../../new-trees/"); var treeBank2 = new TreeBankDrawable("../../../old-trees/"); for (var i = 0; i < treeBank1.Size(); i++) { var parseTree1 = treeBank1.Get(i); var parseTree2 = treeBank2.Get(i); mostFrequentTreeAutoSemantic.AutoSemantic(parseTree1); var nodeDrawableCollector1 = new NodeDrawableCollector((ParseNodeDrawable)parseTree1.GetRoot(), new IsTurkishLeafNode()); var leafList1 = nodeDrawableCollector1.Collect(); var nodeDrawableCollector2 = new NodeDrawableCollector((ParseNodeDrawable)parseTree2.GetRoot(), new IsTurkishLeafNode()); var leafList2 = nodeDrawableCollector2.Collect(); for (var j = 0; j < leafList1.Count; j++) { total++; var parseNode1 = leafList1[j]; var parseNode2 = leafList2[j]; if (parseNode1.GetLayerData(ViewLayerType.SEMANTICS) != null && parseNode1 .GetLayerData(ViewLayerType.SEMANTICS).Equals(parseNode2.GetLayerData(ViewLayerType.SEMANTICS))) { correct++; } } } Assert.AreEqual(475, total); Assert.AreEqual(260, correct); }
public AnnotatedSentence.AnnotatedSentence Convert(ParseTreeDrawable parseTree, List <TreeEnsembleModel> models) { if (parseTree != null) { var annotatedSentence = new AnnotatedSentence.AnnotatedSentence(); var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsLeafNode()); var leafList = nodeDrawableCollector.Collect(); var wordNodePairList = new List <WordNodePair>(); for (var i = 0; i < leafList.Count; i++) { var parseNode = leafList[i]; var wordNodePair = new WordNodePair(parseNode, i + 1); wordNodePair.UpdateNode(); if (wordNodePair.GetNode().GetParent() != null && wordNodePair.GetNode().GetParent().NumberOfChildren() == 1) { wordNodePair.UpdateNode(); Console.WriteLine("check this"); return(null); } annotatedSentence.AddWord(wordNodePair.GetWord()); wordNodePairList.Add(wordNodePair); } ConstructDependenciesFromTree(wordNodePairList, models); return(annotatedSentence); } return(null); }
public List <ParseNodeDrawable> ExtractNodesWithPredicateVerbs(WordNet.WordNet wordNet) { var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)root, new IsPredicateVerbNode(wordNet)); return(nodeDrawableCollector.Collect()); }
public void autoArgument(ParseTreeDrawable parseTree, Frameset frameset) { NodeDrawableCollector nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTransferable(secondLanguage)); List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { if (parseNode.GetLayerData(ViewLayerType.PROPBANK) == null) { foreach (ArgumentType argumentType in Enum.GetValues(typeof(ArgumentType))) { if (frameset.ContainsArgument(argumentType) && AutoDetectArgument(parseNode, argumentType)) { parseNode.GetLayerInfo().SetLayerData(ViewLayerType.PROPBANK, ArgumentTypeStatic.GetPropbankType(argumentType)); } } if (Word.IsPunctuation(parseNode.GetLayerData(secondLanguage))) { parseNode.GetLayerInfo().SetLayerData(ViewLayerType.PROPBANK, "NONE"); } } } parseTree.Save(); }
protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree) { var modified = false; var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { var info = parseNode.GetLayerInfo(); if (info.GetLayerData(ViewLayerType.INFLECTIONAL_GROUP) != null) { var meanings = new List <SynSet> [info.GetNumberOfWords()]; for (var i = 0; i < info.GetNumberOfWords(); i++) { meanings[i] = _turkishWordNet.ConstructSynSets( info.GetMorphologicalParseAt(i).GetWord().GetName(), info.GetMorphologicalParseAt(i), info.GetMetamorphicParseAt(i), _fsm); } switch (info.GetNumberOfWords()) { case 1: if (meanings[0].Count == 1) { modified = true; parseNode.GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, meanings[0][0].GetId()); } break; case 2: if (meanings[0].Count == 1 && meanings[1].Count == 1) { modified = true; parseNode.GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, meanings[0][0].GetId() + "$" + meanings[1][0].GetId()); } break; case 3: if (meanings[0].Count == 1 && meanings[1].Count == 1 && meanings[2].Count == 1) { modified = true; parseNode.GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, meanings[0][0].GetId() + "$" + meanings[1][0].GetId() + "$" + meanings[2][0].GetId()); } break; } } } return(modified); }
protected override void AutoDetectLocation(ParseTreeDrawable parseTree) { NodeDrawableCollector nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode()); List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { if (!parseNode.LayerExists(ViewLayerType.NER)) { String word = parseNode.GetLayerData(ViewLayerType.TURKISH_WORD).ToLower(new CultureInfo("tr")); parseNode.CheckGazetteer(locationGazetteer, word); } } }
public void ExtractVerbal() { var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)root, new IsVPNode()); var nodeList = nodeDrawableCollector.Collect(); foreach (var node in nodeList) { if (node.ExtractVerbal()) { return; } } }
protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree) { var random = new Random(1); var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); for (var i = 0; i < leafList.Count; i++) { var synSets = GetCandidateSynSets(_turkishWordNet, _fsm, leafList, i); if (synSets.Count > 0) { leafList[i].GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, synSets[random.Next(synSets.Count)].GetId()); } } return(true); }
public ParseNodeDrawable PreviousLeafNode(ParseNodeDrawable parseNode) { var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)root, new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); for (var i = 1; i < leafList.Count; i++) { if (leafList[i].Equals(parseNode)) { return(leafList[i - 1]); } } return(null); }
public AnnotatedSentence.AnnotatedSentence GenerateAnnotatedSentence(string language) { var sentence = new AnnotatedSentence.AnnotatedSentence(""); var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)root, new IsEnglishLeafNode()); var leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { var newWord = new AnnotatedWord("{" + language + "=" + parseNode.GetData().GetName() + "}{posTag=" + parseNode.GetParent().GetData().GetName() + "}"); sentence.AddWord(newWord); } return(sentence); }
protected override void AutoDetectPerson(ParseTreeDrawable parseTree) { NodeDrawableCollector nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode()); List <ParseNodeDrawable> leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { if (!parseNode.LayerExists(ViewLayerType.NER)) { String word = parseNode.GetLayerData(ViewLayerType.TURKISH_WORD).ToLower(new CultureInfo("tr")); if (Word.IsHonorific(word) && parseNode.GetParent().GetData().GetName().Equals("NNP")) { parseNode.GetLayerInfo().SetLayerData(ViewLayerType.NER, "PERSON"); } parseNode.CheckGazetteer(personGazetteer, word); } } }
public AnnotatedSentence.AnnotatedSentence GenerateAnnotatedSentence() { var sentence = new AnnotatedSentence.AnnotatedSentence(""); var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)root, new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { var layers = parseNode.GetLayerInfo(); for (var i = 0; i < layers.GetNumberOfWords(); i++) { sentence.AddWord(layers.ToAnnotatedWord(i)); } } return(sentence); }
public void AutoNer(ParseTreeDrawable parseTree) { AutoDetectPerson(parseTree); AutoDetectLocation(parseTree); AutoDetectOrganization(parseTree); AutoDetectMoney(parseTree); AutoDetectTime(parseTree); var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTransferable(secondLanguage)); var leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { if (!parseNode.LayerExists(ViewLayerType.NER)) { parseNode.GetLayerInfo().SetLayerData(ViewLayerType.NER, "NONE"); } } parseTree.Save(); }
public bool UpdateConnectedPredicate(string previousId, string currentId) { var modified = false; var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)root, new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); foreach (var parseNode in leafList) { if (parseNode.GetLayerInfo().GetArgument() != null && parseNode.GetLayerInfo().GetArgument().GetId() != null && parseNode.GetLayerInfo().GetArgument().GetId().Equals(previousId)) { parseNode.GetLayerInfo().SetLayerData(ViewLayerType.PROPBANK, parseNode.GetLayerInfo().GetArgument().GetArgumentType() + "$" + currentId); modified = true; } } return(modified); }
protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree) { var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); for (int i = 0; i < leafList.Count; i++) { var synSets = GetCandidateSynSets(_turkishWordNet, _fsm, leafList, i); if (synSets.Count > 0) { var best = MostFrequent(synSets, leafList[i].GetLayerInfo().GetMorphologicalParseAt(0).GetWord().GetName()); if (best != null) { leafList[i].GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, best.GetId()); } } } return(true); }
protected override bool AutoLabelSingleSemantics(ParseTreeDrawable parseTree) { var random = new Random(1); var nodeDrawableCollector = new NodeDrawableCollector((ParseNodeDrawable)parseTree.GetRoot(), new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); var done = false; for (var i = 0; i < leafList.Count; i++) { var synSets = GetCandidateSynSets(_turkishWordNet, _fsm, leafList, i); var maxIntersection = -1; for (var j = 0; j < synSets.Count; j++) { var synSet = synSets[j]; var intersectionCount = Intersection(synSet, leafList); if (intersectionCount > maxIntersection) { maxIntersection = intersectionCount; } } var maxSynSets = new List <SynSet>(); for (var j = 0; j < synSets.Count; j++) { var synSet = synSets[j]; if (Intersection(synSet, leafList) == maxIntersection) { maxSynSets.Add(synSet); } } if (maxSynSets.Count > 0) { leafList[i].GetLayerInfo().SetLayerData(ViewLayerType.SEMANTICS, maxSynSets[random.Next(maxSynSets.Count)].GetId()); done = true; } } return(done); }
private void SetShallowParseLayer(ChunkType chunkType, string label) { var startWord = true; var nodeLabel = ""; var nodeDrawableCollector = new NodeDrawableCollector(this, new IsTurkishLeafNode()); var leafList = nodeDrawableCollector.Collect(); if (SentenceLabels.Contains(label)) { label = label.Replace(label, "S"); } switch (chunkType) { case ChunkType.EXISTS: label = ""; break; case ChunkType.NORMAL: label = label.Replace("-.*", ""); label = "-" + label; break; case ChunkType.DETAILED: label = label.Replace("[-=](\\d)+$", ""); if (label.Contains("-")) { label = label.Substring(0, label.IndexOf('-') + 4); } label = "-" + label; break; } foreach (var node in leafList) { var layersInNode = node.GetLayerInfo(); for (var i = 0; i < layersInNode.GetNumberOfWords(); i++) { string wordLabel; if (startWord) { wordLabel = "B" + label; startWord = false; } else { wordLabel = "I" + label; } if (i == 0) { nodeLabel = wordLabel; } else { nodeLabel = nodeLabel + " " + wordLabel; } } node.GetLayerInfo().SetLayerData(ViewLayerType.SHALLOW_PARSE, nodeLabel); } }