public void CleanGeneratedIndexables() { foreach (Indexable indexable in generated_indexables) { indexable.Cleanup(); } }
public void Cleanup() { Filter.Cleanup(); Filter.CleanGeneratedIndexables(); Indexable.Cleanup(); Filter = null; Indexable = null; if (PersistentPropDocs != null) { PersistentPropDocs.Clear(); } }
override protected void DoCleanup() { indexable.Cleanup(); }
static void Display(Indexable indexable) { if (!first_indexable) { Console.WriteLine (); Console.WriteLine ("-----------------------------------------"); Console.WriteLine (); } first_indexable = false; Console.WriteLine ("Filename: " + indexable.Uri); if (indexable.ParentUri != null) Console.WriteLine ("Parent: " + indexable.ParentUri); Stopwatch watch = new Stopwatch (); Filter filter; watch.Start (); if (! FilterFactory.FilterIndexable (indexable, out filter)) { indexable.Cleanup (); indexable.NoContent = true; filter = null; } watch.Stop (); Console.WriteLine ("Filter: {0} (determined in {1})", filter, watch); Console.WriteLine ("MimeType: {0}", indexable.MimeType); Console.WriteLine (); ArrayList generated_indexables = new ArrayList (); Indexable generated_indexable; bool first = true; if (filter != null && filter.HasGeneratedIndexable) { while (filter.GenerateNextIndexable (out generated_indexable)) { if (generated_indexable == null) continue; if (first) { Console.WriteLine ("Filter-generated indexables:"); first = false; } Console.WriteLine (" {0}", generated_indexable.Uri); if (show_generated) generated_indexables.Add (generated_indexable); else generated_indexable.Cleanup (); } } if (! first) Console.WriteLine (); // Make sure that the properties are sorted. ArrayList prop_array = new ArrayList (indexable.Properties); prop_array.Sort (); Console.WriteLine ("Properties:"); if (indexable.ValidTimestamp) Console.WriteLine (" Timestamp = {0}", DateTimeUtil.ToString (indexable.Timestamp)); foreach (Beagle.Property prop in prop_array) { if (String.IsNullOrEmpty (prop.Value)) continue; Console.WriteLine (" {0} = {1}", prop.Key, prop.Value); } Console.WriteLine (); if (indexable.NoContent) return; watch.Reset (); watch.Start (); TextReader reader; Analyzer indexing_analyzer = new BeagleAnalyzer (); char[] buffer = new char [2048]; reader = indexable.GetTextReader (); char separater_char = (tokenize ? '\n' : ' '); if (reader != null) { first = true; if (analyze) { if (! stats_only) Console.WriteLine ("Content:"); TokenStream token_stream = indexing_analyzer.TokenStream ("Text", reader); Lucene.Net.Analysis.Token token = token_stream.Next (); first = (token == null); if (! stats_only) for (; token != null; token = token_stream.Next ()) Console.Write ("{0}{1}", token.TermText (), separater_char); token_stream.Close (); } else { #if false while (true) { int l = reader.Read (buffer, 0, 2048); if (l <= 0) break; if (first) first = false; if (! stats_only) DisplayContent (buffer, l); } #else string line; first = true; while ((line = reader.ReadLine ()) != null) { if (first) { Console.WriteLine ("Content:"); first = false; } if (! stats_only) DisplayContent (line); } #endif } reader.Close (); if (first) Console.WriteLine ("(no content)"); else Console.WriteLine ('\n'); } /* reader = indexable.GetHotTextReader (); first = true; if (reader != null) { Console.WriteLine ("HotContent:"); if (analyze) { TokenStream token_stream = indexing_analyzer.TokenStream ("HotText", reader); Lucene.Net.Analysis.Token token = token_stream.Next (); first = (token == null); for (; token != null; token = token_stream.Next ()) Console.Write ("{0}{1}", token.TermText (), separater_char); token_stream.Close (); } else { while (true) { int l = reader.Read (buffer, 0, 2048); if (l <= 0) break; if (first) first = false; DisplayContent (buffer, l); } } reader.Close (); if (first) Console.WriteLine ("(no hot content)"); else Console.WriteLine ('\n'); } */ watch.Stop (); Console.WriteLine (); Console.WriteLine ("Text extracted in {0}", watch); #if ENABLE_RDF_ADAPTER IList<string> links = indexable.Links; if (links != null && links.Count != 0) { Console.WriteLine ("Links:"); foreach (string link in links) Console.WriteLine (link); Console.WriteLine (); } #endif foreach (Indexable gi in generated_indexables) Display (gi); Stream stream = indexable.GetBinaryStream (); if (stream != null) stream.Close (); // Clean up any temporary files associated with filtering this indexable. indexable.Cleanup (); }
private IndexerAddedReceipt AddIndexableToIndex(Indexable indexable, IndexWriter primary_writer, ref IndexWriter secondary_writer, Hashtable prop_change_docs) { Filter filter = null; if (FileFilterNotifier != null) { FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, null); // We don't know what filter yet. } // If we have content, try to find a filter // we we can use to process the indexable bool filter_content = false; try { filter_content = FilterFactory.FilterIndexable(indexable, (disable_textcache ? null : text_cache), out filter); } catch { } if (!filter_content) { indexable.NoContent = true; filter = null; } if (FileFilterNotifier != null) { FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, filter); // Update with our filter } IndexerAddedReceipt r = new IndexerAddedReceipt(indexable.Id); if (filter != null) { if (filter.HasGeneratedIndexable) { Log.Debug("{0} might generate indexables from {1}; deferring until later", indexable.DisplayUri, filter.GetType().ToString()); // This indexable can potentially generate indexables, // so defer its indexing DeferredInfo di; di = new DeferredInfo(indexable, filter, prop_change_docs); deferred_indexables.Insert(0, di); // Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet. // FIXME: Make sure all indexable.Cleanup is called for all indexables if // shutdown is signalled. if (FileFilterNotifier != null) { FileFilterNotifier(null, null, null); // reset } // Return null to signal the indexable was deferred return(null); } // Force the clean-up of temporary files, just in case. // FIXME: I am not sure if the cleanup should happen now. // What is the difference between filter.Cleanup and Indexable.Cleanup ? filter.Cleanup(); r.FilterName = filter.GetType().ToString(); r.FilterVersion = filter.Version; } // If this indexables is not deferred, add it to the index. if (indexable.DisplayUri != indexable.ContentUri) { Log.Debug("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri); } else { Log.Debug("+{0}", indexable.DisplayUri); } Document persistent_prop_doc = null; if (prop_change_docs != null) { persistent_prop_doc = (Document)prop_change_docs [indexable.Uri]; } AddDocumentToIndex(indexable, persistent_prop_doc, primary_writer, ref secondary_writer); if (FileFilterNotifier != null) { FileFilterNotifier(null, null, null); // reset } // Clean up any temporary files associated with filtering this indexable. indexable.Cleanup(); // Remove any existing text cache for this item if (disable_textcache && text_cache != null) { text_cache.Delete(indexable.Uri); } return(r); }
private IndexerAddedReceipt AddIndexableToIndex (Indexable indexable, IndexWriter primary_writer, ref IndexWriter secondary_writer, Hashtable prop_change_docs) { Filter filter = null; if (FileFilterNotifier != null) FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, null); // We don't know what filter yet. // If we have content, try to find a filter // we we can use to process the indexable bool filter_content = false; try { filter_content = FilterFactory.FilterIndexable (indexable, (disable_textcache ? null : text_cache), out filter); } catch { } if (! filter_content) { indexable.NoContent = true; filter = null; } if (FileFilterNotifier != null) FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, filter); // Update with our filter IndexerAddedReceipt r = new IndexerAddedReceipt (indexable.Id); if (filter != null) { if (filter.HasGeneratedIndexable) { Log.Debug ("{0} might generate indexables from {1}; deferring until later", indexable.DisplayUri, filter.GetType ().ToString ()); // This indexable can potentially generate indexables, // so defer its indexing DeferredInfo di; di = new DeferredInfo (indexable, filter, prop_change_docs); deferred_indexables.Insert (0, di); // Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet. // FIXME: Make sure all indexable.Cleanup is called for all indexables if // shutdown is signalled. if (FileFilterNotifier != null) FileFilterNotifier (null, null, null); // reset // Return null to signal the indexable was deferred return null; } // Force the clean-up of temporary files, just in case. // FIXME: I am not sure if the cleanup should happen now. // What is the difference between filter.Cleanup and Indexable.Cleanup ? filter.Cleanup (); r.FilterName = filter.GetType ().ToString (); r.FilterVersion = filter.Version; } // If this indexables is not deferred, add it to the index. if (indexable.DisplayUri != indexable.ContentUri) Log.Debug ("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri); else Log.Debug ("+{0}", indexable.DisplayUri); Document persistent_prop_doc = null; if (prop_change_docs != null) persistent_prop_doc = (Document) prop_change_docs [indexable.Uri]; AddDocumentToIndex (indexable, persistent_prop_doc, primary_writer, ref secondary_writer); if (FileFilterNotifier != null) FileFilterNotifier (null, null, null); // reset // Clean up any temporary files associated with filtering this indexable. indexable.Cleanup (); // Remove any existing text cache for this item if (disable_textcache && text_cache != null) text_cache.Delete (indexable.Uri); return r; }