/* * Recursive method that searches over directory structure * to find the files. Then reads the contents of each file, and * calls addToCollectionFreq() for each word. * Calls appropriate FileMatch method for whether * the search is being done with database or without. * if true is returned, writeToOutput() is called. * @params string dir, Boolean useDataBase, string[] searchTerms, * @params List<HashSet<string>> newSearch */ public void SearchDir(string dir, Boolean useDataBase, string[] searchTerms, List <HashSet <string> > newSearch) { try //handles TextReader and StreamReader errors and non existing-file paths. { foreach (string file in Directory.GetFiles(dir)) //iterates over files in the current directory { String line; //variable to hold output from StreamReader TextReader tr = new StreamReader(file); //reads the file while ((line = tr.ReadLine()) != null) //fires if line is not null { String aLine = Regex.Replace(line, @"[^\w\s]", ""); //removes punctuation, and replaces it with "". words = aLine.Split(' '); //sets an array to the line split at spaces. for (int i = 0; i < words.Length; i++) { if (words[i] != "") //fires if word[i] is not "". { words[i] = words[i].ToLower(); //sets the words to lowercase String wordi = words[i]; //sets the String variable wordi to words[i] TermFreq.AddToCollectionFreq(wordi); //adds the word to collectionWordFreq Hashtable. } } } if (useDataBase) //fires if include synonym checked { if (Search.FileMatchTermDb(words, newSearch)) //fires if file contains a search term or its synonym { writeToOutput(file); //Writes filepath to output ArrayList } } else//fires if include synonym unchecked { if (Search.FileMatchTerm(words, searchTerms))//fires if file contains a search term { writeToOutput(file);//Writes filepath to output ArrayList } } } foreach (string dirs in Directory.GetDirectories(dir)) //iterates through dirctories in directory structure. { SearchDir(dirs, useDataBase, searchTerms, newSearch); //calls its self, so files in next directory can be searched. } } catch (Exception err)//catches TextReader and StreamReader errors { MessageBox.Show("" + err); } }
/* * Populates form with filepaths, most common word, * query term frequency and files found. * Fires resets for collectionFreq and output, * so they are new objects for next search. * @param Boolean useDatabase. */ private void fileToForm(Boolean useDatabase) { if (fileUtil.getOutput.Count == 0)//handles no files being found. { FilePathOutput.Items.Add("No files found"); } foreach (string s in fileUtil.getOutput) //iterates through output, using it's getter. { FilePathOutput.Items.Add(s); //adds each string to list box. } Found.Text = (fileUtil.getOutput.Count).ToString(); //sets Found.Text to length of output, using it's getter. commonWord.Text = TermFreq.CollectionFreq(); //sets commonWord.Text to return variable from CollectionFreq() QueryTermFreq.Text = TermFreq.QueryTermFreq(searchTerms); //sets QueryTermFreq to variable returned from QueryTermFreq() fileUtil.ResetOutput(); //sets ArrayList output to new ArrayList. TermFreq.ResetFreq(); //sets Hashtable collectionWordsFreq to new HashTable. }
/* * Sets the arrays for methods. * @param string line (the contents of file) * @param string file (filepath) * @return string[] */ static private string[] SetArray(string line, string file) { string[] words; //sets new array String trimLine = line.Trim(); //trims whitespace, /r and /n from start and end of line. String aLine = Regex.Replace(trimLine, @"[^\w\s _]", ""); //removes punctuation, and replaces it with "". words = aLine.Split(' '); //sets an array to the line split at spaces. for (int i = 0; i < words.Length; i++) { if (words[i] != "" || words[i].Length < 50) //fires if word[i] is not "". { words[i] = words[i].ToLower(); //sets the words to lowercase String wordi = words[i]; //sets the String variable wordi to words[i] TermFreq.AddToCollectionFreq(wordi); //adds the word to collectionWordFreq Hashtable. } } return(words);//returns words array to method that called SetArray() }
/* * Populates form with filepaths, most common word, * query term frequency and files found. * Fires resets for collectionFreq and output, * so they are new objects for next search. * @param Boolean useDatabase. */ private void FileToForm() { Dictionary <string, int> output = index.WriteToOutput(); if (output.Count == 0)//handles no files being found. { FilePathOutput.Items.Add("No files found"); } foreach (string s in output.Keys) //iterates through output, using it's getter. { FilePathOutput.Items.Add(s); //adds each string to list box. } Found.Text = (output.Count).ToString(); //sets Found.Text to length of output, using it's getter. commonWord.Text = TermFreq.CollectionFreq(); //sets commonWord.Text to return variable from CollectionFreq() QueryTermFreq.Text = TermFreq.QueryTermFreq(searchTerms); //sets QueryTermFreq to variable returned from QueryTermFreq() ICollection key = InvertedIndex.invertedIndex.Keys; //remove before submission, for debugging. }
/* * Resets data structures for rebuild */ public void ResetInvertedIndex() { invertedIndex.Clear(); // clears contents of invertedIndex TermFreq.ResetFreq(); //resets collectionWordsFreq }