/// <summary>
        /// Scans each file in collection by terms
        /// </summary>
        /// <param name="files">The list of files</param>
        /// <param name="searchTerms">the array of search terms</param>
        /// <param name="synonymsOn">check if synonym checking is on</param>
        /// <param name="dataSet">The dataset to retrieve synonyms from</param>
        /// <returns></returns>
        private List <string> ScanFilesByTerms(List <string> files, string[] searchTerms,
                                               Boolean synonymsOn, NewWordsDataSet dataSet)
        {
            db      = new Database(dataSet);
            stemmer = new PorterStemmer();
            List <string> searchFileList = files;

            for (int i = 0; i < searchTerms.Length; i++)
            {
                List <string> fileHasTerm = new List <string>();

                foreach (string file in searchFileList)
                {
                    bool          hasTerm   = false;
                    List <string> fileWords = ReadFromFile.GetWords(file);

                    foreach (string word in fileWords)
                    {
                        if (synonymsOn)
                        {
                            List <string> synonyms = db.GetSynonyms(searchTerms[i]);

                            if (synonyms != null)
                            {
                                foreach (string s in synonyms)
                                {
                                    if (word.Equals(s))
                                    {
                                        hasTerm = true;
                                    }
                                }
                            }
                        }

                        if (stemmer.StemWord(word).Equals(stemmer.StemWord(searchTerms[i])))
                        {
                            hasTerm = true;
                        }
                    }

                    if (hasTerm)
                    {
                        fileHasTerm.Add(file);
                    }
                }
                searchFileList = fileHasTerm;
            }
            return(searchFileList);
        }
        /// <summary>
        /// Returns an array of the stemmed collection
        /// </summary>
        /// <param name="folder"></param>
        /// <returns></returns>
        public string[] GetStemmedCollection(string folder)
        {
            stemmer = new PorterStemmer();
            string[]      collection        = GetWordCollection(folder);
            List <string> stemmedCollection = new List <string>();

            foreach (string word in collection)
            {
                if (word.Length > 2)
                {
                    stemmedCollection.Add(stemmer.StemWord(word));
                }
            }

            return(stemmedCollection.ToArray());
        }
Esempio n. 3
0
        ///<summary>Search the InvertedIndex and return the files</summary>
        ///<param name="dictionary">Recieve the inverted index</param>
        ///<param name="querys">The query list</param>
        ///<return>A List of files</return>
        public List <string> GetFilesFromIndex(string[] querys)
        {
            List <string> files = new List <string>();

            stemmer = new PorterStemmer();

            Dictionary <string, double>[] lists = new Dictionary <string, double> [querys.Length];
            int counter = 0;

            foreach (string query in querys)
            {
                string stemmedQuery = stemmer.StemWord(query);
                lists[counter] = new Dictionary <string, double>();
                if (internalIndex.ContainsKey(stemmedQuery))
                {
                    var innerKeysAndValues = from inner in internalIndex[stemmedQuery]
                                             select new
                    {
                        NewKey   = inner.Key,
                        NewValue = inner.Value
                    };
                    foreach (var innerKeyAndValue in innerKeysAndValues)
                    {
                        int fileID = innerKeyAndValue.NewKey;
                        lists[counter].Add(converter.GetPath(fileID), innerKeyAndValue.NewValue);
                    }
                }
                counter++;
            }

            if (querys.Length > 1)
            {
                for (int i = querys.Length - 1; i > 0; i--)
                {
                    var dict     = lists[i];
                    var nextDict = lists[i - 1];
                    var joined   = from kvp1 in dict
                                   join kvp2 in nextDict on kvp1.Key equals kvp2.Key
                                   select new { kvp1.Key, Value = kvp1.Value + kvp2.Value };

                    var result = joined.ToDictionary(t => t.Key, t => t.Value);
                    lists[i - 1] = result;
                }
            }
            return(ListOrderByDescending(lists));
        }