示例#1
0
        public static void getNN_NNs(string input, string[][] pos, HashSet <string> result)
        {
            //HashSet<string> result = new HashSet<string>();
            //List<int> npIndex = new List<int>();
            //for (int i = 0; i < pos.Length; i++)
            //{
            //    if (pos[i].Equals("NN") || pos[i].Equals("NNS"))
            //        npIndex.Add(i);

            //}
            //string[] words = input.Split((char[])null, StringSplitOptions.RemoveEmptyEntries);
            //    foreach (int indx in npIndex)
            //        result.Add(words[indx]);

            //return result;
            List <int> npIndex = new List <int>();
            int        indx    = 0;

            for (int i = 0; i < pos.GetLength(0); i++)
            {
                for (int j = 0; j < pos[0].Length; j++)
                {
                    if (pos[i][j].Equals("NN") || pos[i][j].Equals("NNS"))
                    {
                        npIndex.Add(indx);
                    }
                    indx++;
                }
            }
            //foreach (int ind in npIndex)
            //    Console.WriteLine(ind);
            input = LanguageAnalyze.RemovePunctuation(input);
            string[] words = input.Split((char[])null, StringSplitOptions.RemoveEmptyEntries);
            //foreach (String s in words)
            //    Console.WriteLine(s);
            foreach (int inin in npIndex)
            {
                if (inin < words.Length)
                {
                    result.Add(words[inin]);
                }
            }
        }
示例#2
0
        public static HashSet <string> getNP(string input)
        {
            HashSet <string> npAll = new HashSet <string>();

            string[] syntaxt = LanguageAnalyze.analyzeDependancyTree(input);
            foreach (string syn in syntaxt)
            {
                Extract(syn, npAll);
            }
            Tuple <string, string[][]> t  = LanguageAnalyze.analyzePOSTag(input);
            HashSet <string>           nn = new HashSet <string>();

            getNN_NNs(t.Item1, t.Item2, nn);
            HashSet <string> result = new HashSet <string>();

            foreach (string nnn in nn)
            {
                if (!containWord(npAll, nnn) && !isStop(nnn))
                {
                    result.Add(nnn);
                }
            }
            foreach (string nnp in npAll)
            {
                if (!isStop(nnp))
                {
                    result.Add(nnp);
                }
            }
            //if (result.Contains("data mining"))
            //    result.Remove("data mining");
            if (result.Contains("database"))
            {
                result.Remove("database");
            }
            return(result);
        }