/// <summary> /// 使用openNLP做英文Parsing。model: en-parser-chunking.bin。 /// </summary> /// <param name="sentence">英文句子。</param> /// <returns>樹狀資料結構。</returns> public Parse Parser(string sentence) { if (sentence.Length <= 0) { return(null); } LoadParser(); string result = String.Empty; Parse[] topParses = ParserTool.parseLine(sentence, parser, 1); return(topParses[0]); }
public string[] getS(string sentence) { if (sentence.Length <= 0) { return(null); } temp = new List <string>(); Parse[] topParses = ParserTool.parseLine(sentence, parser, 1); foreach (Parse p in topParses) { recursiveTraversalTree(p); } return(temp.ToArray()); }
public string Parser(string sentence) { if (sentence.Length <= 0) { return(""); } string result = String.Empty; Parse[] topParses = ParserTool.parseLine(sentence, parser, 1); foreach (Parse p in topParses) { result += getPhrase(p); } return(result); }
public Dictionary <string, List <string> > Main(string line) { //debug sentence // line = "Show me the sales of Kean Cola .25ltr Bottle in Nicosia from January 2017 to October 2017 as a line chart."; matchedWords?.Clear(); nounPhrases?.Clear(); nouns?.Clear(); adjectivePhrases?.Clear(); verbPhrases?.Clear(); InputStream modelIn = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-parser-chunking.bin"); InputStream modelIn1 = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-ner-date.bin"); InputStream modelIn2 = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-token.bin"); ParserModel model = new ParserModel(modelIn); var myParser = ParserFactory.create(model); var topParses = ParserTool.parseLine(line, myParser, 1); foreach (var p in topParses) { GetSentenceParts(p); } try { TokenizerModel model1 = new TokenizerModel(modelIn2); TokenNameFinderModel model2 = new TokenNameFinderModel(modelIn1); Tokenizer tokenizer = new TokenizerME(model1); var nameFinder = new NameFinderME(model2); var tokens = tokenizer.tokenize(line); var nameSpans = nameFinder.find(tokens); var array = Span.spansToStrings(nameSpans, tokens); // // foreach (var v in array) // { // System.Diagnostics.Debug.WriteLine(v); // } dates = new HashSet <string>(array); PrintSets(); // System.Diagnostics.Debug.WriteLine("\nProcessing Presentation type"); // // if (nouns.Contains("table")) // { // matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "table")); // } // if (nounPhrases.Contains("bar chart")) // { // matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "bar chart")); // } // if (nounPhrases.Contains("line chart")) // { // matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "line chart")); // } //TODO IF NO OPTION IS FOUND ASK THE USER TO GIVE YOU ONE. IMPLEMENT IT IN THE WEB VERSION SOON System.Diagnostics.Debug.WriteLine("\nProcessing Dates"); if (dates.Count == 2) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(a, newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dates.ElementAt(0), newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } } if (dates.Count == 1) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); var dts = a.Split(new[] { " to " }, StringSplitOptions.None); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dts[0], newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dts[1], newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(0), newList); } } System.Diagnostics.Debug.WriteLine("\nProcessing noun phrases"); // var manager = new Manager(); // var serializer = new XmlSerializer(typeof(Manager.language)); // var loadStream = new FileStream("file2.xml", FileMode.Open, FileAccess.Read); // var loadedObject = (Manager.language) serializer.Deserialize(loadStream); var doc = new XmlDocument(); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); doc.Load(HttpRuntime.AppDomainAppPath + "\\file2.xml"); var root = doc.SelectSingleNode("*"); FindMatchingNodesFromXml(root, nounPhrases); foreach (var item in nouns.ToList()) { foreach (var VARIABLE in matchedWords) { if (VARIABLE.Key.Contains(item)) { nouns.Remove(item); //Will work! } } } FindMatchingNodesFromXml(root, verbPhrases); // FindMatchingNodesFromXml(root, nouns); System.Diagnostics.Debug.WriteLine("\nProcessing verb phrases "); System.Diagnostics.Debug.WriteLine("\nProcessing nouns "); // construct the dictionary object and open it var directory = Directory.GetCurrentDirectory() + "\\wordnet\\"; foreach (var variable in matchedWords) { System.Diagnostics.Debug.WriteLine(variable.Value + "\t\t" + variable.Key); } foreach (var variable in matchedWords) { string a = variable.Key; if (line.Contains(a)) { line = line.replace(a, ""); } } foreach (var variable in stopWordsofwordnet) { string a = " " + variable.toLowerCase() + " "; if (line.Contains(a)) { line = line.replace(a, " "); } } if (line.contains(".")) { line = line.replace(".", ""); } if (line.contains("-")) { line = line.replace("-", " "); } System.Diagnostics.Debug.WriteLine("/////////////"); System.Diagnostics.Debug.WriteLine("SECOND PARSE STRING " + line); System.Diagnostics.Debug.WriteLine("/////////////"); line = line.Trim(); topParses = ParserTool.parseLine(line, myParser, 1); nounPhrases?.Clear(); dates?.Clear(); verbPhrases?.Clear(); nouns?.Clear(); foreach (var p in topParses) { //p.show(); GetSentenceParts(p); } FindMatchingNodesFromXml(root, nounPhrases); foreach (var item in nouns.ToList()) { foreach (var VARIABLE in matchedWords) { if (VARIABLE.Key.Contains(item)) { nouns.Remove(item); //Will work! } } } FindMatchingNodesFromXml(root, verbPhrases); FindMatchingNodesFromXml(root, nouns); tokens = tokenizer.tokenize(line); nameSpans = nameFinder.find(tokens); array = Span.spansToStrings(nameSpans, tokens); dates = new HashSet <string>(array); PrintSets(); System.Diagnostics.Debug.WriteLine("\nProcessing Dates"); if (dates.Count == 2) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(a, newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dates.ElementAt(0), newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } } if (dates.Count == 1) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); var dts = a.Split(new[] { " to " }, StringSplitOptions.None); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dts[0], newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dts[1], newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(0), newList); } } System.Diagnostics.Debug.WriteLine("\nProcessing noun phrases"); // var manager = new Manager(); // var serializer = new XmlSerializer(typeof(Manager.language)); // var loadStream = new FileStream("file2.xml", FileMode.Open, FileAccess.Read); // var loadedObject = (Manager.language) serializer.Deserialize(loadStream); FindMatchingNodesFromXml(root, nounPhrases); FindMatchingNodesFromXml(root, verbPhrases); FindMatchingNodesFromXml(root, nouns); foreach (var variable in matchedWords) { System.Diagnostics.Debug.WriteLine(variable.Value + "\t\t" + variable.Key); } doc = null; GC.Collect(); GC.WaitForPendingFinalizers(); //MATCHING WITH WORD NET System.Diagnostics.Debug.WriteLine(directory); // var wordNet = new WordNetEngine(); // // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective); // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb); // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun); // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb); // // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective); // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb); // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun); // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb); // // System.Diagnostics.Debug.WriteLine("Loading database..."); // wordNet.Load(); // System.Diagnostics.Debug.WriteLine("Load completed."); // while (true) // { // System.Diagnostics.Debug.WriteLine("\nType first word"); // // var word = System.Diagnostics.Debug.ReadLine(); // var synSetList = wordNet.GetSynSets(word); // // if (synSetList.Count == 0) System.Diagnostics.Debug.WriteLine($"No SynSet found for '{word}'"); // // foreach (var synSet in synSetList) // { // var words = string.Join(", ", synSet.Words); // // System.Diagnostics.Debug.WriteLine($"\nWords: {words}"); // } // } } catch (IOException e) { e.printStackTrace(); } finally { if (modelIn1 != null) { try { modelIn1.close(); } catch (IOException e) { } } if (modelIn2 != null) { try { modelIn2.close(); } catch (IOException e) { } } // truncateLists(ref nounPhrases); // truncateLists(ref nouns); // truncateLists(ref dates); // truncateLists(ref verbPhrases); } return(matchedWords); }