private Dictionary <int, double> CalculateSentenceScores(Dictionary <string, double> normalizedTermFreqInverseSentFreqValues)
        {
            Dictionary <int, double> rankedSentence = new Dictionary <int, double>();

            for (int i = 0; i < _text.GetSentenceCount(); i++)
            {
                double sentenceScore = 0;

                for (int k = 0; k < _text.GetWordCountInSentence(i); k++)
                {
                    sentenceScore += normalizedTermFreqInverseSentFreqValues[_text.GetWord(i, k)];
                }

                rankedSentence[i] = sentenceScore;
            }

            //normalize the sentence scores
            var highestSentenceScore = rankedSentence.Values.Max();

            for (int i = 0; i < _text.GetSentenceCount(); i++)
            {
                rankedSentence[i] = rankedSentence[i] / highestSentenceScore;
            }


            return(rankedSentence);
        }
        /// <summary>
        /// Handles the Cue Phrase Feature
        /// </summary>
        /// <param name="cuePhraseList"></param>
        /// <param name="scoreWeighting"></param>
        /// <returns></returns>
        private Dictionary <int, double> RunCuePhraseFeature(List <string> cuePhraseList, double scoreWeighting)
        {
            Dictionary <int, double> sentenceScore = new Dictionary <int, double>();

            for (int sentenceNumber = 0; sentenceNumber < _unstemmedText.GetSentenceCount(); sentenceNumber++)
            {
                var sentence = _unstemmedText.GetSentence(sentenceNumber);

                string formedSentence = string.Join(" ", sentence);

                var cuePhrasesInSentence = cuePhraseList.Any(phrase => formedSentence.Contains(phrase));

                //if there is a cue phrase exists, add the weighting score to the sentence (per each word)
                if (cuePhrasesInSentence)
                {
                    //todo: tweakable point to improve algortihm

                    // the score is added to each word in the sentence per every cue phrase in the sentence.
                    double scoreToAdd = scoreWeighting * _unstemmedText.GetWordCountInSentence(sentenceNumber);

                    sentenceScore[sentenceNumber] = scoreToAdd;
                }
                else
                {
                    sentenceScore[sentenceNumber] = 0;
                }
            }

            return(sentenceScore);
        }
Пример #3
0
        public static TextModel WordStemmer(TextModel text)
        {
            Stemmer stemmer = new Stemmer();

            for (int i = 0; i < text.GetSentenceCount(); i++)
            {
                for (int k = 0; k < text.GetWordCountInSentence(i); k++)
                {
                    var stemmedWord = stemmer.StemWord(text.GetWord(i, k));

                    text.SetWord(i, k, stemmedWord);
                }
            }
            return(text);
        }