public static void BuildIndex_Click(string sourcePath, string indexPath) { myLuceneApp = new LuceneSearcheEngine(); // Initiate LuceneSearchEngine object myStemmer = new PorterStemmer(); // Initiate PorterStemmer object //thesaurus = myLuceneApp.CreateThesaurus(); // Get thesaurus dictionary DateTime start = System.DateTime.Now; // Indexing time starts myLuceneApp.CreateIndex(indexPath); // Create index at the given path System.Console.WriteLine("Adding Documents to Index"); myLuceneApp.IndexText(sourcePath); // Add file collection to the index one by one System.Console.WriteLine("All documents added."); myLuceneApp.CleanUpIndexer(); // Clean up indexer DateTime end = System.DateTime.Now; // Indexing time ends MessageBox.Show("The time for indexing text was " + (end - start).TotalMilliseconds + " milliseconds", "Reporting Indexing Time", MessageBoxButtons.OK, MessageBoxIcon.Information); }
public string PreProcess(PorterStemmer myStemmer, string text) { char[] splits = new char[] { ' ', '\t', '\'', '"', '-', '(', ')', ',', '’', '\n', '\r', ':', ';', '?', '.', '!' }; // Set token delimiters string[] tokens = text.ToLower().Split(splits, StringSplitOptions.RemoveEmptyEntries); // Tokenisation string ProcessedText = ""; foreach (string t in tokens) // Looping through each token { if ((!stopWords.Contains(t)) && (t.Length > 2)) // Remove stopwords { //string tempt = myStemmer.stemTerm(t); string tempt = GetWeightedExpandedQuery(t); // Call query expansion on tokens ProcessedText += "( " + tempt + ") "; // Add spaces between words } } Console.WriteLine(ProcessedText); return(ProcessedText.TrimEnd()); // Trim tailing spaces }