public static void getNN_NNs(string input, string[][] pos, HashSet <string> result) { //HashSet<string> result = new HashSet<string>(); //List<int> npIndex = new List<int>(); //for (int i = 0; i < pos.Length; i++) //{ // if (pos[i].Equals("NN") || pos[i].Equals("NNS")) // npIndex.Add(i); //} //string[] words = input.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // foreach (int indx in npIndex) // result.Add(words[indx]); //return result; List <int> npIndex = new List <int>(); int indx = 0; for (int i = 0; i < pos.GetLength(0); i++) { for (int j = 0; j < pos[0].Length; j++) { if (pos[i][j].Equals("NN") || pos[i][j].Equals("NNS")) { npIndex.Add(indx); } indx++; } } //foreach (int ind in npIndex) // Console.WriteLine(ind); input = LanguageAnalyze.RemovePunctuation(input); string[] words = input.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); //foreach (String s in words) // Console.WriteLine(s); foreach (int inin in npIndex) { if (inin < words.Length) { result.Add(words[inin]); } } }
public static HashSet <string> getNP(string input) { HashSet <string> npAll = new HashSet <string>(); string[] syntaxt = LanguageAnalyze.analyzeDependancyTree(input); foreach (string syn in syntaxt) { Extract(syn, npAll); } Tuple <string, string[][]> t = LanguageAnalyze.analyzePOSTag(input); HashSet <string> nn = new HashSet <string>(); getNN_NNs(t.Item1, t.Item2, nn); HashSet <string> result = new HashSet <string>(); foreach (string nnn in nn) { if (!containWord(npAll, nnn) && !isStop(nnn)) { result.Add(nnn); } } foreach (string nnp in npAll) { if (!isStop(nnp)) { result.Add(nnp); } } //if (result.Contains("data mining")) // result.Remove("data mining"); if (result.Contains("database")) { result.Remove("database"); } return(result); }