/// <summary> /// Count how many times a word appears in an array of words. /// </summary> /// <param name="word">The word to count.</param> /// <param name="words">A non-null array of words.</param> public static int CountWords(string word, string[] words) { // find the index of one of the items in the array int itemIndex = Array.BinarySearch(words, word); // iterate backwards until we find the first match //if (itemIndex > 0) // while (itemIndex > 0 && words[itemIndex] == word) // itemIndex--; // now itemIndex is one item before the start of the words int count = 0; itemIndex = 0; while (itemIndex < words.Length && itemIndex >= 0) { EnglishStemmer.EnglishWord s = new EnglishStemmer.EnglishWord(words[itemIndex].ToString()); String output1 = s.Stem.ToLower(); EnglishStemmer.EnglishWord s1 = new EnglishStemmer.EnglishWord(word); String output2 = s1.Stem.ToLower(); if (words[itemIndex] == word) count++; else if (output1 == output2) count++; itemIndex++; //if (itemIndex < words.Length) // if (words[itemIndex] != word) // break; } return count; }
string InternalGetWordStem(string word) { string stem; if (_wordStems.TryGetValue(word, out stem)) { BringToFront(word); return stem; } else { stem = new EnglishStemmer.EnglishWord(word).Stem; InsertWord(word, stem); return stem; } }
string InternalGetWordStem(string word) { string stem; if (_wordStems.TryGetValue(word, out stem)) { BringToFront(word); return(stem); } else { stem = new EnglishStemmer.EnglishWord(word).Stem; InsertWord(word, stem); return(stem); } }
public void ParseComments(dynamic commentsStruct, string story_id) { if (commentsStruct.data == null) { return; } foreach (dynamic comment in commentsStruct.data) { Dictionary<string, int> documentfrequency = new Dictionary<string, int>(); bool seen = false; string toParse = comment.message; string refined = toParse.Replace("\'", ""); refined = Regex.Replace(refined, @"[^\w\s]", " "); refined = Regex.Replace(refined, @"\s+", " "); refined = Regex.Replace(refined, @"\d", ""); while (refined != "" && refined[0] == ' ') { refined = refined.Substring(1); } if (refined != null && !referenceStoryDictionary.ContainsKey(comment.id.ToString()) && !referenceCommentDictionary.ContainsKey(comment.id.ToString())) { referenceCommentDictionary.Add(comment.id.ToString(), new CommentStruct() { comment = toParse.Replace("\'", "\'\'").Replace("\\", "\\\\").Replace(",", "\\,"), storyID = story_id, owner = comment.from.id }); } if (referenceStoryDictionary.ContainsKey(comment.id.ToString())) { continue; } string currentWord = null; int i = 0; while (i < refined.Length) { if (refined.Contains(' ')) { while (refined[i] != ' ') { currentWord += refined[i]; i++; if (i >= refined.Length) { break; } } } else { currentWord = refined; i += refined.Length; } string expansion; if (refined != null && currentWord != null) { expansion = currentWord.ToLower(); expansion = new EnglishStemmer.EnglishWord(expansion).Stem; } else { continue; } if (expansion == null) { continue; } if (documentfrequency.ContainsKey(expansion)) { seen = true; } if (!ReferenceDictionary.ContainsKey(expansion)) { ReferenceDictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 }); Dictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 }); word_id++; } else if (!seen) { DictionaryStruct temp = ReferenceDictionary[expansion]; if (!Dictionary.ContainsKey(expansion)) { Dictionary.Add(expansion, new DictionaryStruct() { frequency = 1, word_id = word_id }); word_id++; } else { Dictionary[expansion] = new DictionaryStruct() { frequency = temp.frequency + 1, word_id = Dictionary[expansion].word_id }; } } if (!documentfrequency.ContainsKey(expansion)) { documentfrequency.Add(expansion, 1); } else { documentfrequency[expansion]++; } i++; currentWord = ""; } foreach (KeyValuePair<string, int> kvp in documentfrequency) { postings.Add(new KeyValuePair<int, WordStructure>(ReferenceDictionary[kvp.Key].word_id, new WordStructure(story_id, ReferenceDictionary[kvp.Key].word_id, kvp.Value))); } } }
public void Parse(string toParse, string story_id, string owner, string likes) { Dictionary<string, int> documentfrequency = new Dictionary<string, int>(); string refined = toParse.Replace("\'", ""); refined = Regex.Replace(refined, @"[^\w\s]", " "); refined = Regex.Replace(refined, @"\s+", " "); refined = Regex.Replace(refined, @"\d", ""); while (refined != "" && refined[0] == ' ') { refined = refined.Substring(1); } if (refined != null && !referenceStoryDictionary.ContainsKey(story_id) && !storyDictionary.ContainsKey(story_id)) { referenceStoryDictionary.Add(story_id, new StoryStruct() { story = toParse.Replace("\'","\'\'"), owner = owner, likes = Convert.ToInt32(likes) }); storyDictionary.Add(story_id, new StoryStruct() { story = toParse.Replace("\'", "\'\'"), owner = owner, likes = Convert.ToInt32(likes) }); } string currentWord = null; bool seen = false; int i = 0; while (i < refined.Length) { if (refined.Contains(' ')) { while (refined[i] != ' ') { currentWord += refined[i]; i++; if (i >= refined.Length) { break; } } } else { currentWord = refined; i += refined.Length; } string expansion; if (refined != null && currentWord != null) { expansion = currentWord.ToLower(); expansion = new EnglishStemmer.EnglishWord(expansion).Stem; } else { continue; } if (expansion == null) { continue; } if (stopwords.Contains<string>(expansion)) { i += expansion.Length; continue; } if(documentfrequency.ContainsKey(expansion)) { seen = true; } if (!ReferenceDictionary.ContainsKey(expansion)) { ReferenceDictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 }); Dictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 }); word_id++; } else if(!seen) { DictionaryStruct temp = ReferenceDictionary[expansion]; if (!Dictionary.ContainsKey(expansion)) { Dictionary.Add(expansion, new DictionaryStruct() { frequency = 1, word_id = word_id }); word_id++; } else { Dictionary[expansion] = new DictionaryStruct() { frequency = temp.frequency + 1, word_id = Dictionary[expansion].word_id }; } } if (!documentfrequency.ContainsKey(expansion)) { documentfrequency.Add(expansion, 1); } else { documentfrequency[expansion]++; } i++; currentWord = ""; } foreach (KeyValuePair<string, int> kvp in documentfrequency) { postings.Add(new KeyValuePair<int, WordStructure>(ReferenceDictionary[kvp.Key].word_id, new WordStructure(story_id, ReferenceDictionary[kvp.Key].word_id, kvp.Value))); } }