示例#1
0
 public static void ParseSentence(string sentence)
 {
     //first get the list of words
     List <string> wordList = WordTokenizer.GetWords(sentence);
     //send the word list to CKY to get a parse tree
     BinaryTreeNode btn = CKY(wordList);
 }
示例#2
0
        public static List <string> GetSummary(string input)
        {
            var summarySentences = new List <string>();
            Dictionary <string, List <int> > wordSentenceMap = new Dictionary <string, List <int> >();
            Dictionary <string, string>      actualWord      = new Dictionary <string, string>();
            List <string> sentenceList = SentenceTokenizer.GetSentences(input);

            for (int i = 0; i < sentenceList.Count; i++)
            {
                string        sentence = sentenceList[i];
                List <string> wordList = WordTokenizer.GetWords(sentence);
                foreach (string word in wordList)
                {
                    string stemmedWord = Stemmer.GetStem(word);
                    if (!actualWord.ContainsKey(stemmedWord))
                    {
                        actualWord[stemmedWord] = word;
                    }
                    List <int> mapList = wordSentenceMap.ContainsKey(stemmedWord)? wordSentenceMap[stemmedWord] : new List <int>();
                    mapList.Add(i);
                    wordSentenceMap[stemmedWord] = mapList;
                }
            }
            Dictionary <int, double> sentenceScores = new Dictionary <int, double>();

            for (int i = 0; i < sentenceList.Count; i++)
            {
                double        avgScore = 0;
                string        sentence = sentenceList[i];
                List <string> wordList = WordTokenizer.GetWords(sentence);

                int totalScore = 0;
                int numWords   = 0;
                foreach (string word in wordList)
                {
                    if (!stopWords.Contains(word.ToLower()) && !string.IsNullOrWhiteSpace(word))
                    {
                        totalScore += wordSentenceMap[Stemmer.GetStem(word)].Count * (1 / (1 + CommonWords.GetFrequency(word.ToLower())));
                        numWords   += 1;
                    }
                }
                if (numWords <= 4)
                {
                    continue;
                }
                avgScore          = totalScore * 1.0 / numWords;
                sentenceScores[i] = avgScore;
            }
            var ordered = sentenceScores.OrderByDescending(x => x.Value).Take(5);

            ordered = ordered.OrderBy(y => y.Key);
            List <string> top5 = new List <string>();

            foreach (var item in ordered)
            {
                top5.Add(sentenceList[item.Key]);
            }
            Dictionary <string, Tuple <int, int> > candidates = new Dictionary <string, Tuple <int, int> >();

            foreach (var kvp in wordSentenceMap)
            {
                List <int> mapList    = kvp.Value;
                int        median     = mapList[mapList.Count / 2];
                List <int> medianDiff = new List <int>();
                for (int j = 0; j < mapList.Count; j++)
                {
                    medianDiff.Add(Math.Abs(median - mapList[j]));
                }
                int numItemsBelowThreshold = medianDiff.Where(x => x <= mapList.Count).Count();
                if (numItemsBelowThreshold >= mapList.Count * 0.75 && mapList.Count >= 3 && !string.IsNullOrWhiteSpace(kvp.Key) && !stopWords.Contains(kvp.Key.ToLower()))
                {
                    string key    = kvp.Key;
                    int    minVal = mapList.Where(x => Math.Abs(x - median) <= 3).Min();
                    int    maxVal = mapList.Where(x => Math.Abs(x - median) <= 3).Max();
                    candidates[key] = new Tuple <int, int>(minVal, maxVal);
                }
            }
            return(summarySentences);
        }