/*
  * Recursive method that searches  over directory structure
  * to find the files. Then reads the contents of each file, and
  * calls addToCollectionFreq() for each word.
  * Calls appropriate FileMatch method for whether
  * the search is being done with database or without.
  * if true is returned, writeToOutput() is called.
  * @params string dir, Boolean useDataBase, string[] searchTerms,
  * @params List<HashSet<string>> newSearch
  */
 public void SearchDir(string dir, Boolean useDataBase, string[] searchTerms, List <HashSet <string> > newSearch)
 {
     try                                                             //handles TextReader and StreamReader errors and non existing-file paths.
     {
         foreach (string file in Directory.GetFiles(dir))            //iterates over files in the current directory
         {
             String     line;                                        //variable to hold output from StreamReader
             TextReader tr = new StreamReader(file);                 //reads the file
             while ((line = tr.ReadLine()) != null)                  //fires if line is not null
             {
                 String aLine = Regex.Replace(line, @"[^\w\s]", ""); //removes punctuation, and replaces it with "".
                 words = aLine.Split(' ');                           //sets an array to the line split at spaces.
                 for (int i = 0; i < words.Length; i++)
                 {
                     if (words[i] != "")                      //fires if word[i] is not "".
                     {
                         words[i] = words[i].ToLower();       //sets the words to lowercase
                         String wordi = words[i];             //sets the String variable wordi to words[i]
                         TermFreq.AddToCollectionFreq(wordi); //adds the word to collectionWordFreq Hashtable.
                     }
                 }
             }
             if (useDataBase)                                  //fires if include synonym checked
             {
                 if (Search.FileMatchTermDb(words, newSearch)) //fires if file contains a search term or its synonym
                 {
                     writeToOutput(file);                      //Writes filepath to output ArrayList
                 }
             }
             else//fires if include synonym unchecked
             {
                 if (Search.FileMatchTerm(words, searchTerms))//fires if file contains a search term
                 {
                     writeToOutput(file);//Writes filepath to output ArrayList
                 }
             }
         }
         foreach (string dirs in Directory.GetDirectories(dir))    //iterates through dirctories in directory structure.
         {
             SearchDir(dirs, useDataBase, searchTerms, newSearch); //calls its self, so files in next directory can be searched.
         }
     }
     catch (Exception err)//catches TextReader and StreamReader errors
     {
         MessageBox.Show("" + err);
     }
 }
Example #2
0
        /*
         * Populates form with filepaths, most common word,
         * query term frequency and files found.
         * Fires resets for collectionFreq and output,
         * so they are new objects for next search.
         * @param Boolean useDatabase.
         */
        private void fileToForm(Boolean useDatabase)
        {
            if (fileUtil.getOutput.Count == 0)//handles no files being found.
            {
                FilePathOutput.Items.Add("No files found");
            }

            foreach (string s in fileUtil.getOutput)                    //iterates through output, using it's getter.
            {
                FilePathOutput.Items.Add(s);                            //adds each string to list box.
            }
            Found.Text         = (fileUtil.getOutput.Count).ToString(); //sets Found.Text to length of output, using it's getter.
            commonWord.Text    = TermFreq.CollectionFreq();             //sets commonWord.Text to return variable from CollectionFreq()
            QueryTermFreq.Text = TermFreq.QueryTermFreq(searchTerms);   //sets QueryTermFreq to variable returned from QueryTermFreq()
            fileUtil.ResetOutput();                                     //sets ArrayList output to new ArrayList.
            TermFreq.ResetFreq();                                       //sets Hashtable collectionWordsFreq to new HashTable.
        }
Example #3
0
        /*
         * Sets the arrays for methods.
         * @param string line (the contents of file)
         * @param string file (filepath)
         * @return string[]
         */
        static private string[] SetArray(string line, string file)
        {
            string[] words;                                                //sets new array
            String   trimLine = line.Trim();                               //trims whitespace, /r and /n from start and  end of line.
            String   aLine    = Regex.Replace(trimLine, @"[^\w\s _]", ""); //removes punctuation, and replaces it with "".

            words = aLine.Split(' ');                                      //sets an array to the line split at spaces.
            for (int i = 0; i < words.Length; i++)
            {
                if (words[i] != "" || words[i].Length < 50) //fires if word[i] is not "".
                {
                    words[i] = words[i].ToLower();          //sets the words to lowercase
                    String wordi = words[i];                //sets the String variable wordi to words[i]
                    TermFreq.AddToCollectionFreq(wordi);    //adds the word to collectionWordFreq Hashtable.
                }
            }
            return(words);//returns words array to method that called SetArray()
        }
Example #4
0
        /*
         * Populates form with filepaths, most common word,
         * query term frequency and files found.
         * Fires resets for collectionFreq and output,
         * so they are new objects for next search.
         * @param Boolean useDatabase.
         */
        private void FileToForm()
        {
            Dictionary <string, int> output = index.WriteToOutput();

            if (output.Count == 0)//handles no files being found.
            {
                FilePathOutput.Items.Add("No files found");
            }

            foreach (string s in output.Keys)                         //iterates through output, using it's getter.
            {
                FilePathOutput.Items.Add(s);                          //adds each string to list box.
            }
            Found.Text         = (output.Count).ToString();           //sets Found.Text to length of output, using it's getter.
            commonWord.Text    = TermFreq.CollectionFreq();           //sets commonWord.Text to return variable from CollectionFreq()
            QueryTermFreq.Text = TermFreq.QueryTermFreq(searchTerms); //sets QueryTermFreq to variable returned from QueryTermFreq()
            ICollection key = InvertedIndex.invertedIndex.Keys;       //remove before submission, for debugging.
        }
 /*
  * Resets data structures for rebuild
  */
 public void ResetInvertedIndex()
 {
     invertedIndex.Clear(); // clears contents of invertedIndex
     TermFreq.ResetFreq();  //resets collectionWordsFreq
 }