private Dictionary <int, double> CalculateSentenceScores(Dictionary <string, double> normalizedTermFreqInverseSentFreqValues) { Dictionary <int, double> rankedSentence = new Dictionary <int, double>(); for (int i = 0; i < _text.GetSentenceCount(); i++) { double sentenceScore = 0; for (int k = 0; k < _text.GetWordCountInSentence(i); k++) { sentenceScore += normalizedTermFreqInverseSentFreqValues[_text.GetWord(i, k)]; } rankedSentence[i] = sentenceScore; } //normalize the sentence scores var highestSentenceScore = rankedSentence.Values.Max(); for (int i = 0; i < _text.GetSentenceCount(); i++) { rankedSentence[i] = rankedSentence[i] / highestSentenceScore; } return(rankedSentence); }
/// <summary> /// Handles the Cue Phrase Feature /// </summary> /// <param name="cuePhraseList"></param> /// <param name="scoreWeighting"></param> /// <returns></returns> private Dictionary <int, double> RunCuePhraseFeature(List <string> cuePhraseList, double scoreWeighting) { Dictionary <int, double> sentenceScore = new Dictionary <int, double>(); for (int sentenceNumber = 0; sentenceNumber < _unstemmedText.GetSentenceCount(); sentenceNumber++) { var sentence = _unstemmedText.GetSentence(sentenceNumber); string formedSentence = string.Join(" ", sentence); var cuePhrasesInSentence = cuePhraseList.Any(phrase => formedSentence.Contains(phrase)); //if there is a cue phrase exists, add the weighting score to the sentence (per each word) if (cuePhrasesInSentence) { //todo: tweakable point to improve algortihm // the score is added to each word in the sentence per every cue phrase in the sentence. double scoreToAdd = scoreWeighting * _unstemmedText.GetWordCountInSentence(sentenceNumber); sentenceScore[sentenceNumber] = scoreToAdd; } else { sentenceScore[sentenceNumber] = 0; } } return(sentenceScore); }
public static TextModel WordStemmer(TextModel text) { Stemmer stemmer = new Stemmer(); for (int i = 0; i < text.GetSentenceCount(); i++) { for (int k = 0; k < text.GetWordCountInSentence(i); k++) { var stemmedWord = stemmer.StemWord(text.GetWord(i, k)); text.SetWord(i, k, stemmedWord); } } return(text); }