// Private methods below here -  only available within the SearchUtilities class.

        /*
         * Scans files and searches for terms/synonyms in each file.
         * Marks files true if all terms are found in file
         * Returns tyhe list of files that are true
         */
        private List <string> ScanFilesByFile(List <string> files, string[] searchTerms,
                                              Boolean synonymsOn, NewWordsDataSet dataSet)
        {
            db      = new Database(dataSet);
            stemmer = new PorterStemmer();
            List <string> fileContainsTerm = new List <string>();

            foreach (string file in files)
            {
                bool[]        isInFile  = new bool[searchTerms.Length]; // array for true/false search terms
                List <string> fileWords = ReadFromFile.GetWords(file);  // Read the file and return list of words

                foreach (string word in fileWords)
                {
                    // Search word over terms
                    int counter = 0; // counter for boolean array

                    foreach (string term in searchTerms)
                    {
                        if (synonymsOn)
                        {
                            List <string> checkList = new List <string> {
                                term.ToLower()
                            };
                            // get list of synonyms
                            List <string> synonyms = db.GetSynonyms(term);
                            if (synonyms != null)
                            {
                                foreach (string s in synonyms)
                                {
                                    checkList.Add(s);
                                }
                            }

                            // iterate over list
                            foreach (string s in checkList)
                            {
                                if (word.Equals(s))
                                {
                                    isInFile[counter] = true; //mark this term or synonyms as true
                                }
                            }
                        }

                        if (stemmer.StemWord(word).Equals(stemmer.StemWord(term.ToLower())))
                        {
                            isInFile[counter] = true; // mark this term as true
                        }

                        counter++;
                    }
                }

                if (isInFile.All(x => x))       // tests if ALL search terms are true
                {
                    fileContainsTerm.Add(file); // add file "string" to the List if true
                }
            }
            return(fileContainsTerm);
        }
        /// <summary>
        /// Scans each file in collection by terms
        /// </summary>
        /// <param name="files">The list of files</param>
        /// <param name="searchTerms">the array of search terms</param>
        /// <param name="synonymsOn">check if synonym checking is on</param>
        /// <param name="dataSet">The dataset to retrieve synonyms from</param>
        /// <returns></returns>
        private List <string> ScanFilesByTerms(List <string> files, string[] searchTerms,
                                               Boolean synonymsOn, NewWordsDataSet dataSet)
        {
            db      = new Database(dataSet);
            stemmer = new PorterStemmer();
            List <string> searchFileList = files;

            for (int i = 0; i < searchTerms.Length; i++)
            {
                List <string> fileHasTerm = new List <string>();

                foreach (string file in searchFileList)
                {
                    bool          hasTerm   = false;
                    List <string> fileWords = ReadFromFile.GetWords(file);

                    foreach (string word in fileWords)
                    {
                        if (synonymsOn)
                        {
                            List <string> synonyms = db.GetSynonyms(searchTerms[i]);

                            if (synonyms != null)
                            {
                                foreach (string s in synonyms)
                                {
                                    if (word.Equals(s))
                                    {
                                        hasTerm = true;
                                    }
                                }
                            }
                        }

                        if (stemmer.StemWord(word).Equals(stemmer.StemWord(searchTerms[i])))
                        {
                            hasTerm = true;
                        }
                    }

                    if (hasTerm)
                    {
                        fileHasTerm.Add(file);
                    }
                }
                searchFileList = fileHasTerm;
            }
            return(searchFileList);
        }
Exemple #3
0
        /// <summary>
        /// Returns the frequency of search terms from the index.
        /// </summary>
        /// <param name="terms"></param>
        /// <returns></returns>
        public string GetQueryFrequencyFromIndex(string[] terms)
        {
            string result = "";

            stemmer = new PorterStemmer();

            foreach (string word in internalIndex.Keys)
            {
                for (int i = 0; i < terms.Length; i++)
                {
                    if (word.Equals(stemmer.StemWord(terms[i])))
                    {
                        double freqCount = 0;
                        var    frequency = from inner in internalIndex[word]
                                           select new
                        {
                            NewKey = inner.Key, NewValue = inner.Value
                        };
                        foreach (var count in frequency)
                        {
                            freqCount += count.NewValue;
                        }
                        result += terms[i].ToLower() + ": " + freqCount + "\r\n";
                    }
                }
            }
            return(result);
        }
Exemple #4
0
        ///<summary>
        ///Creates an Hashtable that is an Inverted index oif the collection
        ///</summary>
        ///<param name="folder">The folder containing the collection</param>
        ///<returns>
        ///A Hashtable of the collection
        ///</returns>
        public Dictionary <string, Dictionary <int, double> > InvertedIndex(string folder)
        {
            if (internalIndex != null)
            {
                internalIndex.Clear();
            }                                                                     // clears the memory usage of exisitng Index
            internalIndex = new Dictionary <string, Dictionary <int, double> >(); // the invertedIndex to be returned
            searchUtil    = new SearchUtilities();                                // instantiate SearchUtilities class object
            dynamic form1 = Application.OpenForms[0];                             // will create a reference to the Main Form object

            indexCount = 0;                                                       // a counter for how large the inverted index is.

            Dictionary <int, double> fileList = new Dictionary <int, double>();   // a list to populate the files that match a term

            stemmer = new PorterStemmer();                                        // instantiate a PorterStemmer object to stem words from files

            foreach (string file in searchUtil.IndexingFolders(folder))
            {
                int fileID = converter.AssignId(file); // create an Id from the string of the file and store in HashMap Converter.paths

                foreach (string word in ReadFromFile.GetWords(file))
                {
                    // stem the word
                    string stemmedWord = stemmer.StemWord(word);
                    // create the Dictionary for the collection
                    if (internalIndex.ContainsKey(stemmedWord))
                    {
                        fileList = internalIndex[stemmedWord];
                        // check if the file is already in the list or not
                        if (fileList.ContainsKey(fileID))
                        {
                            fileList[fileID] = double.Parse(fileList[fileID].ToString()) + 1;
                        }
                        else
                        {
                            fileList.Add(fileID, 1.0);
                        }

                        internalIndex[stemmedWord] = fileList;
                    }
                    else
                    {
                        // create a new key and start new List of files for the key
                        fileList = new Dictionary <int, double>
                        {
                            { fileID, 1.0 }
                        };
                        internalIndex.Add(stemmedWord, fileList);
                        indexCount++;
                    }
                }
                form1.ShowIndexLength(false); // cross thread method to keep a running total of the index size on the Main form.
            }
            return(internalIndex);
        }
        /// <summary>
        /// Returns an array of the stemmed collection
        /// </summary>
        /// <param name="folder"></param>
        /// <returns></returns>
        public string[] GetStemmedCollection(string folder)
        {
            stemmer = new PorterStemmer();
            string[]      collection        = GetWordCollection(folder);
            List <string> stemmedCollection = new List <string>();

            foreach (string word in collection)
            {
                if (word.Length > 2)
                {
                    stemmedCollection.Add(stemmer.StemWord(word));
                }
            }

            return(stemmedCollection.ToArray());
        }
Exemple #6
0
        ///<summary>Search the InvertedIndex and return the files</summary>
        ///<param name="dictionary">Recieve the inverted index</param>
        ///<param name="querys">The query list</param>
        ///<return>A List of files</return>
        public List <string> GetFilesFromIndex(string[] querys)
        {
            List <string> files = new List <string>();

            stemmer = new PorterStemmer();

            Dictionary <string, double>[] lists = new Dictionary <string, double> [querys.Length];
            int counter = 0;

            foreach (string query in querys)
            {
                string stemmedQuery = stemmer.StemWord(query);
                lists[counter] = new Dictionary <string, double>();
                if (internalIndex.ContainsKey(stemmedQuery))
                {
                    var innerKeysAndValues = from inner in internalIndex[stemmedQuery]
                                             select new
                    {
                        NewKey   = inner.Key,
                        NewValue = inner.Value
                    };
                    foreach (var innerKeyAndValue in innerKeysAndValues)
                    {
                        int fileID = innerKeyAndValue.NewKey;
                        lists[counter].Add(converter.GetPath(fileID), innerKeyAndValue.NewValue);
                    }
                }
                counter++;
            }

            if (querys.Length > 1)
            {
                for (int i = querys.Length - 1; i > 0; i--)
                {
                    var dict     = lists[i];
                    var nextDict = lists[i - 1];
                    var joined   = from kvp1 in dict
                                   join kvp2 in nextDict on kvp1.Key equals kvp2.Key
                                   select new { kvp1.Key, Value = kvp1.Value + kvp2.Value };

                    var result = joined.ToDictionary(t => t.Key, t => t.Value);
                    lists[i - 1] = result;
                }
            }
            return(ListOrderByDescending(lists));
        }