/// <summary> /// Scans each file in collection by terms /// </summary> /// <param name="files">The list of files</param> /// <param name="searchTerms">the array of search terms</param> /// <param name="synonymsOn">check if synonym checking is on</param> /// <param name="dataSet">The dataset to retrieve synonyms from</param> /// <returns></returns> private List <string> ScanFilesByTerms(List <string> files, string[] searchTerms, Boolean synonymsOn, NewWordsDataSet dataSet) { db = new Database(dataSet); stemmer = new PorterStemmer(); List <string> searchFileList = files; for (int i = 0; i < searchTerms.Length; i++) { List <string> fileHasTerm = new List <string>(); foreach (string file in searchFileList) { bool hasTerm = false; List <string> fileWords = ReadFromFile.GetWords(file); foreach (string word in fileWords) { if (synonymsOn) { List <string> synonyms = db.GetSynonyms(searchTerms[i]); if (synonyms != null) { foreach (string s in synonyms) { if (word.Equals(s)) { hasTerm = true; } } } } if (stemmer.StemWord(word).Equals(stemmer.StemWord(searchTerms[i]))) { hasTerm = true; } } if (hasTerm) { fileHasTerm.Add(file); } } searchFileList = fileHasTerm; } return(searchFileList); }
/// <summary> /// Returns an array of the stemmed collection /// </summary> /// <param name="folder"></param> /// <returns></returns> public string[] GetStemmedCollection(string folder) { stemmer = new PorterStemmer(); string[] collection = GetWordCollection(folder); List <string> stemmedCollection = new List <string>(); foreach (string word in collection) { if (word.Length > 2) { stemmedCollection.Add(stemmer.StemWord(word)); } } return(stemmedCollection.ToArray()); }
///<summary>Search the InvertedIndex and return the files</summary> ///<param name="dictionary">Recieve the inverted index</param> ///<param name="querys">The query list</param> ///<return>A List of files</return> public List <string> GetFilesFromIndex(string[] querys) { List <string> files = new List <string>(); stemmer = new PorterStemmer(); Dictionary <string, double>[] lists = new Dictionary <string, double> [querys.Length]; int counter = 0; foreach (string query in querys) { string stemmedQuery = stemmer.StemWord(query); lists[counter] = new Dictionary <string, double>(); if (internalIndex.ContainsKey(stemmedQuery)) { var innerKeysAndValues = from inner in internalIndex[stemmedQuery] select new { NewKey = inner.Key, NewValue = inner.Value }; foreach (var innerKeyAndValue in innerKeysAndValues) { int fileID = innerKeyAndValue.NewKey; lists[counter].Add(converter.GetPath(fileID), innerKeyAndValue.NewValue); } } counter++; } if (querys.Length > 1) { for (int i = querys.Length - 1; i > 0; i--) { var dict = lists[i]; var nextDict = lists[i - 1]; var joined = from kvp1 in dict join kvp2 in nextDict on kvp1.Key equals kvp2.Key select new { kvp1.Key, Value = kvp1.Value + kvp2.Value }; var result = joined.ToDictionary(t => t.Key, t => t.Value); lists[i - 1] = result; } } return(ListOrderByDescending(lists)); }