/// <summary>
        /// Count how many times a word appears in an array of words.
        /// </summary>
        /// <param name="word">The word to count.</param>
        /// <param name="words">A non-null array of words.</param>
        public static int CountWords(string word, string[] words)
        {
            // find the index of one of the items in the array
            int itemIndex = Array.BinarySearch(words, word);

            // iterate backwards until we find the first match
            //if (itemIndex > 0)
            //    while (itemIndex > 0 && words[itemIndex] == word)
            //        itemIndex--;

            // now itemIndex is one item before the start of the words
            int count = 0;
            itemIndex = 0;
            while (itemIndex < words.Length && itemIndex >= 0)
            {
                EnglishStemmer.EnglishWord s = new EnglishStemmer.EnglishWord(words[itemIndex].ToString());
                String output1 = s.Stem.ToLower();

                EnglishStemmer.EnglishWord s1 = new EnglishStemmer.EnglishWord(word);
                String output2 = s1.Stem.ToLower();

                if (words[itemIndex] == word)
                    count++;
                else if (output1 == output2)
                    count++;

                itemIndex++;

                //if (itemIndex < words.Length)
                //    if (words[itemIndex] != word)
                //        break;
            }

            return count;
        }
Example #2
0
 string InternalGetWordStem(string word)
 {
     string stem;
     if (_wordStems.TryGetValue(word, out stem))
     {
         BringToFront(word);
         return stem;
     }
     else
     {
         stem = new EnglishStemmer.EnglishWord(word).Stem;
         InsertWord(word, stem);
         return stem;
     }
 }
Example #3
0
        string InternalGetWordStem(string word)
        {
            string stem;

            if (_wordStems.TryGetValue(word, out stem))
            {
                BringToFront(word);
                return(stem);
            }
            else
            {
                stem = new EnglishStemmer.EnglishWord(word).Stem;
                InsertWord(word, stem);
                return(stem);
            }
        }
Example #4
0
        public void ParseComments(dynamic commentsStruct, string story_id)
        {
            if (commentsStruct.data == null)
            {
                return;
            }

            foreach (dynamic comment in commentsStruct.data)
            {
                Dictionary<string, int> documentfrequency = new Dictionary<string, int>();
                bool seen = false;
                string toParse = comment.message;
                string refined = toParse.Replace("\'", "");
                refined = Regex.Replace(refined, @"[^\w\s]", " ");
                refined = Regex.Replace(refined, @"\s+", " ");
                refined = Regex.Replace(refined, @"\d", "");

                while (refined != "" && refined[0] == ' ')
                {
                    refined = refined.Substring(1);
                }
                if (refined != null && !referenceStoryDictionary.ContainsKey(comment.id.ToString()) && !referenceCommentDictionary.ContainsKey(comment.id.ToString()))
                {
                    referenceCommentDictionary.Add(comment.id.ToString(), new CommentStruct() { comment = toParse.Replace("\'", "\'\'").Replace("\\", "\\\\").Replace(",", "\\,"), storyID = story_id, owner = comment.from.id });
                }

                if (referenceStoryDictionary.ContainsKey(comment.id.ToString()))
                {
                    continue;
                }

                string currentWord = null;
                int i = 0;
                while (i < refined.Length)
                {
                    if (refined.Contains(' '))
                    {
                        while (refined[i] != ' ')
                        {
                            currentWord += refined[i];
                            i++;
                            if (i >= refined.Length)
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        currentWord = refined;
                        i += refined.Length;
                    }

                    string expansion;
                    if (refined != null && currentWord != null)
                    {
                        expansion = currentWord.ToLower();
                        expansion = new EnglishStemmer.EnglishWord(expansion).Stem;
                    }
                    else
                    {
                        continue;
                    }

                    if (expansion == null)
                    {
                        continue;
                    }

                    if (documentfrequency.ContainsKey(expansion))
                    {
                        seen = true;
                    }

                    if (!ReferenceDictionary.ContainsKey(expansion))
                    {
                        ReferenceDictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 });
                        Dictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 });
                        word_id++;
                    }
                    else if (!seen)
                    {
                        DictionaryStruct temp = ReferenceDictionary[expansion];
                        if (!Dictionary.ContainsKey(expansion))
                        {
                            Dictionary.Add(expansion, new DictionaryStruct() { frequency = 1, word_id = word_id });
                            word_id++;
                        }
                        else
                        {
                            Dictionary[expansion] = new DictionaryStruct() { frequency = temp.frequency + 1, word_id = Dictionary[expansion].word_id };
                        }
                    }
                    if (!documentfrequency.ContainsKey(expansion))
                    {
                        documentfrequency.Add(expansion, 1);
                    }
                    else
                    {
                        documentfrequency[expansion]++;
                    }

                    i++;
                    currentWord = "";
                }

                foreach (KeyValuePair<string, int> kvp in documentfrequency)
                {
                    postings.Add(new KeyValuePair<int, WordStructure>(ReferenceDictionary[kvp.Key].word_id, new WordStructure(story_id, ReferenceDictionary[kvp.Key].word_id, kvp.Value)));
                }
            }
        }
Example #5
0
        public void Parse(string toParse, string story_id, string owner, string likes)
        {
            Dictionary<string, int> documentfrequency = new Dictionary<string, int>();
            string refined = toParse.Replace("\'", "");
            refined = Regex.Replace(refined, @"[^\w\s]", " ");
            refined = Regex.Replace(refined, @"\s+", " ");
            refined = Regex.Replace(refined, @"\d", "");

            while (refined != "" && refined[0] == ' ')
            {
                refined = refined.Substring(1);
            }
            if (refined != null && !referenceStoryDictionary.ContainsKey(story_id) && !storyDictionary.ContainsKey(story_id))
            {
                referenceStoryDictionary.Add(story_id, new StoryStruct() { story = toParse.Replace("\'","\'\'"), owner = owner, likes = Convert.ToInt32(likes) });
                storyDictionary.Add(story_id, new StoryStruct() { story = toParse.Replace("\'", "\'\'"), owner = owner, likes = Convert.ToInt32(likes) });
            }
            string currentWord = null;
            bool seen = false;
            int i = 0;
            while (i < refined.Length)
            {
                if (refined.Contains(' '))
                {
                    while (refined[i] != ' ')
                    {
                        currentWord += refined[i];
                        i++;
                        if (i >= refined.Length)
                        {
                            break;
                        }
                    }
                }
                else
                {
                    currentWord = refined;
                    i += refined.Length;
                }

                string expansion;
                if (refined != null && currentWord != null)
                {
                    expansion = currentWord.ToLower();
                    expansion = new EnglishStemmer.EnglishWord(expansion).Stem;
                }
                else
                {
                    continue;
                }

                if (expansion == null)
                {
                    continue;
                }

                if (stopwords.Contains<string>(expansion))
                {
                    i += expansion.Length;
                    continue;
                }

                if(documentfrequency.ContainsKey(expansion))
                {
                    seen = true;
                }

                if (!ReferenceDictionary.ContainsKey(expansion))
                {
                    ReferenceDictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 });
                    Dictionary.Add(expansion, new DictionaryStruct() { word_id = word_id, frequency = 1 });
                    word_id++;
                }
                else if(!seen)
                {
                    DictionaryStruct temp = ReferenceDictionary[expansion];
                    if (!Dictionary.ContainsKey(expansion))
                    {
                        Dictionary.Add(expansion, new DictionaryStruct() { frequency = 1, word_id = word_id });
                        word_id++;
                    }
                    else
                    {
                        Dictionary[expansion] = new DictionaryStruct() { frequency = temp.frequency + 1, word_id = Dictionary[expansion].word_id };
                    }
                }
                if (!documentfrequency.ContainsKey(expansion))
                {
                    documentfrequency.Add(expansion, 1);
                }
                else
                {
                    documentfrequency[expansion]++;
                }

                i++;
                currentWord = "";
            }

            foreach (KeyValuePair<string, int> kvp in documentfrequency)
            {
                postings.Add(new KeyValuePair<int, WordStructure>(ReferenceDictionary[kvp.Key].word_id, new WordStructure(story_id, ReferenceDictionary[kvp.Key].word_id, kvp.Value)));
            }
        }