Esempio n. 1
0
        public static void BuildIndex_Click(string sourcePath, string indexPath)
        {
            myLuceneApp = new LuceneSearcheEngine();    // Initiate LuceneSearchEngine object
            myStemmer   = new PorterStemmer();          // Initiate PorterStemmer object
            //thesaurus = myLuceneApp.CreateThesaurus();  // Get thesaurus dictionary

            DateTime start = System.DateTime.Now; // Indexing time starts

            myLuceneApp.CreateIndex(indexPath);   // Create index at the given path
            System.Console.WriteLine("Adding Documents to Index");
            myLuceneApp.IndexText(sourcePath);    // Add file collection to the index one by one
            System.Console.WriteLine("All documents added.");
            myLuceneApp.CleanUpIndexer();         // Clean up indexer
            DateTime end = System.DateTime.Now;   // Indexing time ends

            MessageBox.Show("The time for indexing text was " + (end - start).TotalMilliseconds + " milliseconds", "Reporting Indexing Time", MessageBoxButtons.OK, MessageBoxIcon.Information);
        }
Esempio n. 2
0
        public string PreProcess(PorterStemmer myStemmer, string text)
        {
            char[]   splits = new char[] { ' ', '\t', '\'', '"', '-', '(', ')', ',', '’', '\n', '\r', ':', ';', '?', '.', '!' }; // Set token delimiters
            string[] tokens = text.ToLower().Split(splits, StringSplitOptions.RemoveEmptyEntries);                               // Tokenisation

            string ProcessedText = "";

            foreach (string t in tokens)                        // Looping through each token
            {
                if ((!stopWords.Contains(t)) && (t.Length > 2)) // Remove stopwords
                {
                    //string tempt = myStemmer.stemTerm(t);
                    string tempt = GetWeightedExpandedQuery(t); // Call query expansion on tokens
                    ProcessedText += "( " + tempt + ") ";       // Add spaces between words
                }
            }
            Console.WriteLine(ProcessedText);
            return(ProcessedText.TrimEnd());     // Trim tailing spaces
        }