/// <summary>
        /// Remove tokens and the StopWords. Stopwords are removed by calling stopWords
        /// string assigned at the top of this file.
        /// </summary>
        /// <param name="myStemmer"></param>
        /// <param name="text"></param>
        /// <returns></returns>
        public string PreProcess(PorterStemmer myStemmer, string text)
        {
            char[]   splits = new char[] { ' ', '\t', '\'', '"', '-', '(', ')', ',', '’', '\n', ':', ';', '?', '.', '!' }; // Set delimiters for the tokenize
            string[] tokens = text.ToLower().Split(splits, StringSplitOptions.RemoveEmptyEntries);                         // Tokenisation

            string ProcessedText = "";

            foreach (string t in tokens)                        // Looping through each token
            {
                if ((!stopWords.Contains(t)) && (t.Length > 2)) // Remove stopwords from the tokens
                {
                    string tempt = GetWeightedExpandedQuery(t); // Call query expansion on tokens
                    ProcessedText += tempt + " ";               // Add spaces between words
                }
            }
            return(ProcessedText.TrimEnd());     // Trim tailing spaces
        }
Example #2
0
        public static PorterStemmer newStemmer;           // Set publicly callable PorterStemmer object



        public static void BuildIndex_Click(string sourcePath, string indexPath)
        {
            eduSearchApp = new MainSearchEngine();    // Initiate LuceneSearchEngine object
            newStemmer   = new PorterStemmer();       // Initiate PorterStemmer object
                                                      //thesaurus = eduSearchApp.CreateThesaurus();  // Get thesaurus dictionary
            MainSearchBox messageLog = new MainSearchBox();

            Program.AddLog("Adding documents to the Index");

            if (sourcePath is null)
            {
                MessageBox.Show("Please add collection path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
            else if (indexPath is null)
            {
                MessageBox.Show("Please add the index directory path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
            else
            {
                DateTime start = System.DateTime.Now;   // Indexing time starts
                eduSearchApp.CreateIndex(indexPath);    // Create index at the given path


                Program.AddLog("Adding documents to the Index");

                eduSearchApp.IndexText(sourcePath);      // Add file collection to the index one by one

                // Program.AddLog("All Documents Added to the Index");



                eduSearchApp.CleanUpIndexer();       // Clean up indexer for Fresh Search
                DateTime end = System.DateTime.Now;  // Indexing time ends
                Program.AddLog("The time for indexing text was " + (end - start));

                //Program.AddLog("TimeTesting");
                // MessageBox.Show("The time for indexing text was " + (end - start).Milliseconds / 1000.0 + " Seconds.", "Success!!", MessageBoxButtons.OK, MessageBoxIcon.Hand);
            }
        }
Example #3
0
        public static PorterStemmer newStemmer;           // Set publicly callable PorterStemmer object

        public static void BuildIndex_Click(string sourcePath, string indexPath, string timetoIndex)
        {
            double        timeIn;
            MainSearchBox messagelog = new MainSearchBox();

            eduSearchApp = new MainSearchEngine();    // Initiate LuceneSearchEngine object
            newStemmer   = new PorterStemmer();       // Initiate PorterStemmer object
            //thesaurus = eduSearchApp.CreateThesaurus();  // Get thesaurus dictionary

            if (sourcePath is null)
            {
                MessageBox.Show("Please add collection path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
            else if (indexPath is null)
            {
                MessageBox.Show("Please add the index directory path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
            else
            {
                DateTime start = System.DateTime.Now;   // Indexing time starts
                eduSearchApp.CreateIndex(indexPath);    // Create index at the given path

                // Program.AddLog("Adding Documents to Index");

                eduSearchApp.IndexText(sourcePath);   // Add file collection to the index one by one

                // Program.AddLog("All Documents Added to the Index");

                eduSearchApp.CleanUpIndexer();       // Clean up indexer for Fresh Search
                DateTime end = System.DateTime.Now;  // Indexing time ends

                timeIn = (end - start).Milliseconds / 1000.0;

                timetoIndex = timeIn.ToString();
                MainSearchBox.timetoIndex = timetoIndex; // to send time to index to Screen below index panel
            }
        }