/// <summary> /// Remove tokens and the StopWords. Stopwords are removed by calling stopWords /// string assigned at the top of this file. /// </summary> /// <param name="myStemmer"></param> /// <param name="text"></param> /// <returns></returns> public string PreProcess(PorterStemmer myStemmer, string text) { char[] splits = new char[] { ' ', '\t', '\'', '"', '-', '(', ')', ',', '’', '\n', ':', ';', '?', '.', '!' }; // Set delimiters for the tokenize string[] tokens = text.ToLower().Split(splits, StringSplitOptions.RemoveEmptyEntries); // Tokenisation string ProcessedText = ""; foreach (string t in tokens) // Looping through each token { if ((!stopWords.Contains(t)) && (t.Length > 2)) // Remove stopwords from the tokens { string tempt = GetWeightedExpandedQuery(t); // Call query expansion on tokens ProcessedText += tempt + " "; // Add spaces between words } } return(ProcessedText.TrimEnd()); // Trim tailing spaces }
public static PorterStemmer newStemmer; // Set publicly callable PorterStemmer object public static void BuildIndex_Click(string sourcePath, string indexPath) { eduSearchApp = new MainSearchEngine(); // Initiate LuceneSearchEngine object newStemmer = new PorterStemmer(); // Initiate PorterStemmer object //thesaurus = eduSearchApp.CreateThesaurus(); // Get thesaurus dictionary MainSearchBox messageLog = new MainSearchBox(); Program.AddLog("Adding documents to the Index"); if (sourcePath is null) { MessageBox.Show("Please add collection path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error); } else if (indexPath is null) { MessageBox.Show("Please add the index directory path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error); } else { DateTime start = System.DateTime.Now; // Indexing time starts eduSearchApp.CreateIndex(indexPath); // Create index at the given path Program.AddLog("Adding documents to the Index"); eduSearchApp.IndexText(sourcePath); // Add file collection to the index one by one // Program.AddLog("All Documents Added to the Index"); eduSearchApp.CleanUpIndexer(); // Clean up indexer for Fresh Search DateTime end = System.DateTime.Now; // Indexing time ends Program.AddLog("The time for indexing text was " + (end - start)); //Program.AddLog("TimeTesting"); // MessageBox.Show("The time for indexing text was " + (end - start).Milliseconds / 1000.0 + " Seconds.", "Success!!", MessageBoxButtons.OK, MessageBoxIcon.Hand); } }
public static PorterStemmer newStemmer; // Set publicly callable PorterStemmer object public static void BuildIndex_Click(string sourcePath, string indexPath, string timetoIndex) { double timeIn; MainSearchBox messagelog = new MainSearchBox(); eduSearchApp = new MainSearchEngine(); // Initiate LuceneSearchEngine object newStemmer = new PorterStemmer(); // Initiate PorterStemmer object //thesaurus = eduSearchApp.CreateThesaurus(); // Get thesaurus dictionary if (sourcePath is null) { MessageBox.Show("Please add collection path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error); } else if (indexPath is null) { MessageBox.Show("Please add the index directory path properly", "Error!!", MessageBoxButtons.OK, MessageBoxIcon.Error); } else { DateTime start = System.DateTime.Now; // Indexing time starts eduSearchApp.CreateIndex(indexPath); // Create index at the given path // Program.AddLog("Adding Documents to Index"); eduSearchApp.IndexText(sourcePath); // Add file collection to the index one by one // Program.AddLog("All Documents Added to the Index"); eduSearchApp.CleanUpIndexer(); // Clean up indexer for Fresh Search DateTime end = System.DateTime.Now; // Indexing time ends timeIn = (end - start).Milliseconds / 1000.0; timetoIndex = timeIn.ToString(); MainSearchBox.timetoIndex = timetoIndex; // to send time to index to Screen below index panel } }