// Private methods below here -  only available within the SearchUtilities class.

        /*
         * Scans files and searches for terms/synonyms in each file.
         * Marks files true if all terms are found in file
         * Returns tyhe list of files that are true
         */
        private List <string> ScanFilesByFile(List <string> files, string[] searchTerms,
                                              Boolean synonymsOn, NewWordsDataSet dataSet)
        {
            db      = new Database(dataSet);
            stemmer = new PorterStemmer();
            List <string> fileContainsTerm = new List <string>();

            foreach (string file in files)
            {
                bool[]        isInFile  = new bool[searchTerms.Length]; // array for true/false search terms
                List <string> fileWords = ReadFromFile.GetWords(file);  // Read the file and return list of words

                foreach (string word in fileWords)
                {
                    // Search word over terms
                    int counter = 0; // counter for boolean array

                    foreach (string term in searchTerms)
                    {
                        if (synonymsOn)
                        {
                            List <string> checkList = new List <string> {
                                term.ToLower()
                            };
                            // get list of synonyms
                            List <string> synonyms = db.GetSynonyms(term);
                            if (synonyms != null)
                            {
                                foreach (string s in synonyms)
                                {
                                    checkList.Add(s);
                                }
                            }

                            // iterate over list
                            foreach (string s in checkList)
                            {
                                if (word.Equals(s))
                                {
                                    isInFile[counter] = true; //mark this term or synonyms as true
                                }
                            }
                        }

                        if (stemmer.StemWord(word).Equals(stemmer.StemWord(term.ToLower())))
                        {
                            isInFile[counter] = true; // mark this term as true
                        }

                        counter++;
                    }
                }

                if (isInFile.All(x => x))       // tests if ALL search terms are true
                {
                    fileContainsTerm.Add(file); // add file "string" to the List if true
                }
            }
            return(fileContainsTerm);
        }
        /// <summary>
        /// Scans each file in collection by terms
        /// </summary>
        /// <param name="files">The list of files</param>
        /// <param name="searchTerms">the array of search terms</param>
        /// <param name="synonymsOn">check if synonym checking is on</param>
        /// <param name="dataSet">The dataset to retrieve synonyms from</param>
        /// <returns></returns>
        private List <string> ScanFilesByTerms(List <string> files, string[] searchTerms,
                                               Boolean synonymsOn, NewWordsDataSet dataSet)
        {
            db      = new Database(dataSet);
            stemmer = new PorterStemmer();
            List <string> searchFileList = files;

            for (int i = 0; i < searchTerms.Length; i++)
            {
                List <string> fileHasTerm = new List <string>();

                foreach (string file in searchFileList)
                {
                    bool          hasTerm   = false;
                    List <string> fileWords = ReadFromFile.GetWords(file);

                    foreach (string word in fileWords)
                    {
                        if (synonymsOn)
                        {
                            List <string> synonyms = db.GetSynonyms(searchTerms[i]);

                            if (synonyms != null)
                            {
                                foreach (string s in synonyms)
                                {
                                    if (word.Equals(s))
                                    {
                                        hasTerm = true;
                                    }
                                }
                            }
                        }

                        if (stemmer.StemWord(word).Equals(stemmer.StemWord(searchTerms[i])))
                        {
                            hasTerm = true;
                        }
                    }

                    if (hasTerm)
                    {
                        fileHasTerm.Add(file);
                    }
                }
                searchFileList = fileHasTerm;
            }
            return(searchFileList);
        }
예제 #3
0
        /// <summary>
        /// Gets the files from the inverted index that contain the querys
        /// and their synonyms.
        /// </summary>
        /// <param name="dictionary">The inverted index</param>
        /// <param name="querys">The array of querys</param>
        /// <param name="dataSet">The dataset to draw synoyms from</param>
        /// <returns></returns>
        public List <string> GetFilesFromIndexWithSynonyms(string[] querys, NewWordsDataSet dataSet)
        {
            List <string> files = new List <string>();

            stemmer = new PorterStemmer();

            Database database = new Database(dataSet);

            Dictionary <string, double>[] lists = new Dictionary <string, double> [querys.Length];
            int counter = 0;

            foreach (string query in querys)
            {
                string stemmedQuery = stemmer.StemWord(query);
                lists[counter] = new Dictionary <string, double>();
                if (internalIndex.ContainsKey(stemmedQuery))
                {
                    var innerKeysAndValues = from inner in internalIndex[stemmedQuery]
                                             select new
                    {
                        NewKey   = inner.Key,
                        NewValue = inner.Value
                    };
                    foreach (var innerKeyAndValue in innerKeysAndValues)
                    {
                        int fileID = innerKeyAndValue.NewKey;
                        lists[counter].Add(converter.GetPath(fileID), innerKeyAndValue.NewValue);
                    }
                }
                List <string> synonmys = database.GetSynonyms(query);
                if (synonmys != null)
                {
                    foreach (string synonym in synonmys)
                    {
                        string stemmedSynonym = stemmer.StemWord(synonym);
                        if (internalIndex.ContainsKey(stemmedSynonym))
                        {
                            var innerKeysAndValues = from inner in internalIndex[stemmedSynonym]
                                                     select new
                            {
                                NewKey   = inner.Key,
                                NewValue = inner.Value
                            };
                            foreach (var innerKeyAndValue in innerKeysAndValues)
                            {
                                string path = converter.GetPath(innerKeyAndValue.NewKey);
                                if (!lists[counter].ContainsKey(path))
                                {
                                    lists[counter].Add(path, innerKeyAndValue.NewValue);
                                }
                            }
                        }
                    }
                }

                counter++;
            }

            if (querys.Length > 1)
            {
                for (int i = querys.Length - 1; i > 0; i--)
                {
                    var dict     = lists[i];
                    var nextDict = lists[i - 1];
                    var joined   = from kvp1 in dict
                                   join kvp2 in nextDict on kvp1.Key equals kvp2.Key
                                   select new { kvp1.Key, Value = kvp1.Value + kvp2.Value };

                    var result = joined.ToDictionary(t => t.Key, t => t.Value);
                    lists[i - 1] = result;
                }
            }

            return(ListOrderByDescending(lists));
        }
예제 #4
0
        private NewWordsDataSet nwDataSet; // An instance variable pointing at the DataSet

        // Constructor for Database class, requires a NewWordsDataSet to be passed as an argument
        public Database(NewWordsDataSet dataSet)
        {
            this.nwDataSet = dataSet; // instantiates the passed dataSet for the class methods to use
        }
        /*
         * Returns the list of files inside folder that contains the search terms or synonyms (if it is checked)
         * Iterating by the files in the folder
         */
        public List <string> GetFilesContainingTermsByFiles(string folder, string[] terms, Boolean synonymsOn, NewWordsDataSet dataSet)
        {
            // List<string> folders = GetFolders(folder);
            List <string> files = IndexingFolders(folder);

            return(ScanFilesByFile(files, terms, synonymsOn, dataSet));
        }