private void copy_from(EnglishStemmer other) { B_Y_found = other.B_Y_found; I_p2 = other.I_p2; I_p1 = other.I_p1; copy_from(other); }
public static List <NLPData> AnalyzeCorpus() { var corpus = DataHandler.Reviews.Select(r => r.reviewText); var outputWords = new List <NLPData>(); List <List <string> > stemList = new List <List <string> >(); long id = 1; foreach (var review in corpus) { var stemmer = new EnglishStemmer(); var reviewContent = review.Split(' '); List <string> currentStemList = new List <string>(); List <NLPData> currentWords = new List <NLPData>(); //handle initial word analysis foreach (var word in reviewContent) { if (!DataHandler.StopWords.Contains(word)) { NLPData newWord = new NLPData() { Word = word, ID = id, Stem = stemmer.Stem(word), }; currentStemList.Add(newWord.Stem); id++; currentWords.Add(newWord); } } foreach (var item in currentWords) { item.Tf = currentWords.Count(i => i.Stem == item.Stem); } outputWords.AddRange(currentWords.Where(word => !string.IsNullOrEmpty(word.Stem) && !string.IsNullOrEmpty(word.Word))); stemList.Add(currentStemList); } foreach (var word in outputWords) { word.Idf = (stemList.Count() / (stemList.Count(doc => doc.Contains(word.Stem)))); word.TfIdf = word.Tf / word.Idf; } return(outputWords); }