protected void ReportProgress(int progressPercentage, ProgressInfo pi)
 {
     if (ProgressChanged != null)
     {
         ProgressChanged(this, new ProgressChangedEventArgs(progressPercentage, pi));
     }
 }
Example #2
0
        /// <summary>
        /// Creates the index in the specified path, using the corpusReader object
        /// as the documents feed
        /// </summary>
        /// <param name="corpusReader"></param>
        /// <param name="indexPath"></param>
        public void CreateIndex(WikiDumpReader corpusReader, string indexPath)
        {
            cr = corpusReader;

            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

            writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexPath)), analyzer, true,
                                         IndexWriter.MaxFieldLength.UNLIMITED);
            writer.SetUseCompoundFile(false);

            // This will be called whenever a document is read by the provided ICorpusReader
            corpusReader.OnDocument += corpusDoc =>
            {
                if (corpusReader.AbortReading)
                    return;

                // Blaaaah that's ugly. Make sure parsing doesn't stick us in an infinite loop
                var t = Task.Factory.StartNew(() => corpusDoc.AsHtml());
                var timeout = t.Wait(TimeSpan.FromMinutes(2));
                var content = timeout ? t.Result : string.Empty;

                // skip blank documents, they are worthless to us (even though they have a title we could index)
                if (string.IsNullOrEmpty(content))
                    return;

                // Create a new index document
                var doc = new Document();
                doc.Add(new Field("Id", corpusDoc.Id, Field.Store.YES,
                    Field.Index.NOT_ANALYZED_NO_NORMS));

                // Add title field
                var titleField = new Field("Title", corpusDoc.Title, Field.Store.YES,
                    Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                titleField.SetBoost(3.0f);
                doc.Add(titleField);

                doc.Add(new Field("Content", content, Field.Store.COMPRESS,
                    Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

                writer.AddDocument(doc);
            };

            // Progress reporting
            corpusReader.OnProgress += (percentage, status, isRunning) =>
            {
                var pi = new ProgressInfo { IsStillRunning = true, Status = string.Format("{0} ({1}%)", status, percentage) };
                Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(percentage, pi));
            };

            // Execute corpus reading, which will trigger indexing for each document found
            corpusReader.Read();
            cr = null;

            // Clean up and close
            writer.SetUseCompoundFile(true);
            writer.Optimize();
            writer.Close();
            writer = null;

            var pi1 = new ProgressInfo { IsStillRunning = false, Status = "Ready" };
            Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(100, pi1));
        }
 protected void ReportProgress(byte progressPercentage, string status, bool isRunning)
 {
     var pi = new ProgressInfo {Status = status, IsStillRunning = isRunning};
     ReportProgress(progressPercentage, pi);
 }