private IndexerReceipt [] FlushGeneratedIndexables_Unlocked (IndexerRequest request) { int num_indexed = 0; ArrayList receipt_queue; receipt_queue = new ArrayList (); if (text_cache != null) text_cache.BeginTransaction (); IndexWriter primary_writer, secondary_writer; primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; IndexerAddedReceipt r; Log.Debug ("Continuing indexing generated indexables from {0} indexables", deferred_indexables.Count); // Access using index so that we can add more deferred_indexable at the front // deferred_indexables are added at the front and fetched from the front like a stack while (deferred_indexables.Count > 0) { DeferredInfo di = (DeferredInfo) deferred_indexables [0]; if (di.Indexable.LocalState ["HasNextIndexable"] != null) { // Finally, good to index // Should we do a sanity check ? deferred_indexables [0] =?= di deferred_indexables.RemoveAt (0); Document persistent_prop_doc = null; if (di.PersistentPropDocs != null) persistent_prop_doc = (Document) di.PersistentPropDocs [di.Indexable.Uri]; if (di.Indexable.DisplayUri != di.Indexable.ContentUri) Log.Debug ("+{0} ({1}) [deferred]", di.Indexable.DisplayUri, di.Indexable.ContentUri); else Log.Debug ("+{0} [deferred]", di.Indexable.DisplayUri); AddDocumentToIndex (di.Indexable, persistent_prop_doc, primary_writer, ref secondary_writer); // Add the receipt if the indexable was submitted and not generated if (di.Indexable.LocalState ["GeneratedIndexable"] == null) { r = new IndexerAddedReceipt (di.Indexable.Id); r.FilterName = di.Filter.GetType ().ToString (); r.FilterVersion = di.Filter.Version; receipt_queue.Add (r); } // Cleanup, and text cache maintenance. di.Cleanup (); if (disable_textcache && text_cache != null) text_cache.Delete (di.Indexable.Uri); num_indexed ++; continue; } Log.Debug ("Processing deferred indexable from {0}", di.Indexable.DisplayUri); bool next = false; while (! next && ! Shutdown.ShutdownRequested && num_indexed <= RequestFlushThreshold) { Indexable generated_indexable = null; bool next_indexable = false; try { next_indexable = di.Filter.GenerateNextIndexable (out generated_indexable); } catch (Exception e) { Log.Error (e, "Error while generating next indexable from {0}", di.Indexable.DisplayUri); } if (! next_indexable) { // Mark it for indexing and leave it in the stack di.Indexable.LocalState ["HasNextIndexable"] = false; next = true; break; } if (generated_indexable == null) continue; Log.Debug ("Adding generated indexable {0}", generated_indexable.DisplayUri); // Mark this indexable generated_indexable.LocalState ["GeneratedIndexable"] = true; // IndexerGenerated indexables have a common parenturi, which has been used before // to remove all docs from the lucene index with that parenturi. So, now we can safely // go ahead and just add the new information. r = AddIndexableToIndex (generated_indexable, primary_writer, ref secondary_writer, di.PersistentPropDocs); // But do not add r to the receipt queue, since this was generated if (r != null) // null receipt is returned if generated_indexable is deferred num_indexed ++; } if (Shutdown.ShutdownRequested || num_indexed > RequestFlushThreshold) break; } if (text_cache != null) text_cache.CommitTransaction (); if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) di.Cleanup (); deferred_indexables.Clear (); primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); return null; } primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug ("{0} more indexable-generating indexable remainding to index; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt paused_receipt = new IndexerIndexablesReceipt (); receipt_queue.Add (paused_receipt); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) receipt_array [i] = (IndexerReceipt) receipt_queue [i]; return receipt_array; }
private IndexerReceipt [] FlushGeneratedIndexables_Unlocked(IndexerRequest request) { int num_indexed = 0; ArrayList receipt_queue; receipt_queue = new ArrayList(); if (text_cache != null) { text_cache.BeginTransaction(); } IndexWriter primary_writer, secondary_writer; primary_writer = new IndexWriter(PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; IndexerAddedReceipt r; Log.Debug("Continuing indexing generated indexables from {0} indexables", deferred_indexables.Count); // Access using index so that we can add more deferred_indexable at the front // deferred_indexables are added at the front and fetched from the front like a stack while (deferred_indexables.Count > 0) { DeferredInfo di = (DeferredInfo)deferred_indexables [0]; if (di.Indexable.LocalState ["HasNextIndexable"] != null) { // Finally, good to index // Should we do a sanity check ? deferred_indexables [0] =?= di deferred_indexables.RemoveAt(0); Document persistent_prop_doc = null; if (di.PersistentPropDocs != null) { persistent_prop_doc = (Document)di.PersistentPropDocs [di.Indexable.Uri]; } if (di.Indexable.DisplayUri != di.Indexable.ContentUri) { Log.Debug("+{0} ({1}) [deferred]", di.Indexable.DisplayUri, di.Indexable.ContentUri); } else { Log.Debug("+{0} [deferred]", di.Indexable.DisplayUri); } AddDocumentToIndex(di.Indexable, persistent_prop_doc, primary_writer, ref secondary_writer); // Add the receipt if the indexable was submitted and not generated if (di.Indexable.LocalState ["GeneratedIndexable"] == null) { r = new IndexerAddedReceipt(di.Indexable.Id); r.FilterName = di.Filter.GetType().ToString(); r.FilterVersion = di.Filter.Version; receipt_queue.Add(r); } // Cleanup, and text cache maintenance. di.Cleanup(); if (disable_textcache && text_cache != null) { text_cache.Delete(di.Indexable.Uri); } num_indexed++; continue; } Log.Debug("Processing deferred indexable from {0}", di.Indexable.DisplayUri); bool next = false; while (!next && !Shutdown.ShutdownRequested && num_indexed <= RequestFlushThreshold) { Indexable generated_indexable = null; bool next_indexable = false; try { next_indexable = di.Filter.GenerateNextIndexable(out generated_indexable); } catch (Exception e) { Log.Error(e, "Error while generating next indexable from {0}", di.Indexable.DisplayUri); } if (!next_indexable) { // Mark it for indexing and leave it in the stack di.Indexable.LocalState ["HasNextIndexable"] = false; next = true; break; } if (generated_indexable == null) { continue; } Log.Debug("Adding generated indexable {0}", generated_indexable.DisplayUri); // Mark this indexable generated_indexable.LocalState ["GeneratedIndexable"] = true; // IndexerGenerated indexables have a common parenturi, which has been used before // to remove all docs from the lucene index with that parenturi. So, now we can safely // go ahead and just add the new information. r = AddIndexableToIndex(generated_indexable, primary_writer, ref secondary_writer, di.PersistentPropDocs); // But do not add r to the receipt queue, since this was generated if (r != null) // null receipt is returned if generated_indexable is deferred { num_indexed++; } } if (Shutdown.ShutdownRequested || num_indexed > RequestFlushThreshold) { break; } } if (text_cache != null) { text_cache.CommitTransaction(); } if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) { di.Cleanup(); } deferred_indexables.Clear(); primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } return(null); } primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug("{0} more indexable-generating indexable remainding to index; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt paused_receipt = new IndexerIndexablesReceipt(); receipt_queue.Add(paused_receipt); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) { receipt_array [i] = (IndexerReceipt)receipt_queue [i]; } return(receipt_array); }
private IndexerReceipt [] Flush_Unlocked (IndexerRequest request) { ArrayList receipt_queue; receipt_queue = new ArrayList (); IndexReader primary_reader, secondary_reader; primary_reader = IndexReader.Open (PrimaryStore); secondary_reader = IndexReader.Open (SecondaryStore); // Step #1: Make our first pass over the list of // indexables that make up our request. For each add // or property change in the request, get the Lucene // documents so we can move forward any persistent // properties (for adds) or all old properties (for // property changes). // // Then, for each add or remove in the request, // delete the associated documents from the index. // Note that we previously cached added documents so // that we can move persistent properties forward. // parent_child_old_props is double-nested hashtable (depth-2 tree) // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents) // FIXME: 2-level hashtable is a waste for any non-child document. // Replace this by a better data structure. Hashtable parent_child_old_props = UriFu.NewHashtable (); TermDocs term_docs = secondary_reader.TermDocs (); int delete_count = 0; IEnumerable request_indexables = request.Indexables; foreach (Indexable indexable in request_indexables) { string uri_str = UriFu.UriToEscapedString (indexable.Uri); Term term; // Store the necessary properties from old documents for re-addition if (indexable.Type == IndexableType.Add || indexable.Type == IndexableType.PropertyChange) { term = new Term ("Uri", uri_str); term_docs.Seek (term); Hashtable this_parent_child_props = null; if (term_docs.Next ()) { this_parent_child_props = UriFu.NewHashtable (); this_parent_child_props [indexable.Uri] = secondary_reader.Document (term_docs.Doc ()); parent_child_old_props [indexable.Uri] = this_parent_child_props; } term = new Term ("ParentUri", uri_str); term_docs.Seek (term); while (term_docs.Next ()) { Document doc = secondary_reader.Document (term_docs.Doc ()); string child_uri_str = doc.Get ("Uri"); Uri child_uri = UriFu.EscapedStringToUri (child_uri_str); // Any valid lucene document *should* have a Uri, so no need to check for null // Store the child documents too, to save persistent-properties // of child documents this_parent_child_props [child_uri] = doc; } } // Now remove (non-remove indexables will be re-added in next block) Logger.Log.Debug ("-{0}", indexable.DisplayUri); int num_delete = 0; term = new Term ("Uri", uri_str); // For property changes, only secondary index is modified secondary_reader.DeleteDocuments (term); // Now remove from everywhere else (if asked to remove or if asked to add, in which case // we first remove and then add) // So we also need to remove child documents if (indexable.Type != IndexableType.PropertyChange) { num_delete = primary_reader.DeleteDocuments (term); // When we delete an indexable, also delete any children. // FIXME: Shouldn't we also delete any children of children, etc.? term = new Term ("ParentUri", uri_str); num_delete += primary_reader.DeleteDocuments (term); secondary_reader.DeleteDocuments (term); } // If this is a strict removal (and not a deletion that // we are doing in anticipation of adding something back), // queue up a removed receipt. if (indexable.Type == IndexableType.Remove) { IndexerRemovedReceipt r; r = new IndexerRemovedReceipt (indexable.Id); r.NumRemoved = num_delete; receipt_queue.Add (r); } delete_count += num_delete; } term_docs.Close (); if (HaveItemCount) AdjustItemCount (-delete_count); else SetItemCount (primary_reader); // We are now done with the readers, so we close them. // And also free them. Somehow not freeing them is preventing them from // GCed at all. primary_reader.Close (); primary_reader = null; secondary_reader.Close (); secondary_reader = null; // FIXME: If we crash at exactly this point, we are in // trouble. Items will have been dropped from the index // without the proper replacements being added. We can // hopefully fix this when we move to Lucene 2.1. // Step #2: Make another pass across our list of indexables // and write out any new documents. if (text_cache != null) text_cache.BeginTransaction (); IndexWriter primary_writer, secondary_writer; // FIXME: Lock obtain time-out can happen here; if that happens, // an exception will be thrown and this method will break in the middle // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification // methods. primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; foreach (Indexable indexable in request_indexables) { // If shutdown has been started, break here // FIXME: Some more processing will continue, a lot of them // concerning receipts, but the daemon will anyway ignore receipts // now, what is the fastest way to stop from here ? if (Shutdown.ShutdownRequested) { Log.Debug ("Shutdown initiated. Breaking while flushing indexables."); break; } // Receipts for removes were generated in the // previous block. Now we just have to remove // items from the text cache. if (indexable.Type == IndexableType.Remove) { if (text_cache != null) text_cache.Delete (indexable.Uri); continue; } IndexerAddedReceipt r; Hashtable prop_change_docs = (Hashtable) parent_child_old_props [indexable.Uri]; if (indexable.Type == IndexableType.PropertyChange) { Logger.Log.Debug ("+{0} (props only)", indexable.DisplayUri); r = new IndexerAddedReceipt (indexable.Id); r.PropertyChangesOnly = true; receipt_queue.Add (r); Document doc; if (prop_change_docs == null) doc = null; else doc = (Document) prop_change_docs [indexable.Uri]; Document new_doc; new_doc = RewriteDocument (doc, indexable); // Write out the new document... if (secondary_writer == null) secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false); secondary_writer.AddDocument (new_doc); // Get child property change indexables... ArrayList prop_change_indexables; prop_change_indexables = GetChildPropertyChange (prop_change_docs, indexable); // and store them; no need to delete them first, since they were already removed from the index if (prop_change_indexables == null) continue; foreach (Indexable prop_change_indexable in prop_change_indexables) { Log.Debug ("+{0} (props only, generated indexable)", prop_change_indexable.Uri); doc = (Document) prop_change_docs [prop_change_indexable.Uri]; new_doc = RewriteDocument (doc, prop_change_indexable); secondary_writer.AddDocument (new_doc); } continue; // ...and proceed to the next Indexable } // If we reach this point we know we are dealing with an IndexableType.Add if (indexable.Type != IndexableType.Add) throw new Exception ("When I said it was an IndexableType.Add, I meant it!"); r = AddIndexableToIndex (indexable, primary_writer, ref secondary_writer, prop_change_docs); if (r != null) receipt_queue.Add (r); } if (text_cache != null) text_cache.CommitTransaction (); if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) di.Cleanup (); deferred_indexables.Clear (); foreach (Indexable indexable in request_indexables) indexable.Cleanup (); primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); return null; } if (request.OptimizeIndex) { Stopwatch watch = new Stopwatch (); Logger.Log.Debug ("Optimizing {0}", IndexName); watch.Start (); primary_writer.Optimize (); if (secondary_writer == null) secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false); secondary_writer.Optimize (); watch.Stop (); Logger.Log.Debug ("{0} optimized in {1}", IndexName, watch); } // Step #4. Close our writers and return the events to // indicate what has happened. primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug ("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt r = new IndexerIndexablesReceipt (); receipt_queue.Add (r); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) receipt_array [i] = (IndexerReceipt) receipt_queue [i]; return receipt_array; }
private IndexerReceipt [] Flush_Unlocked(IndexerRequest request) { ArrayList receipt_queue; receipt_queue = new ArrayList(); IndexReader primary_reader, secondary_reader; primary_reader = IndexReader.Open(PrimaryStore); secondary_reader = IndexReader.Open(SecondaryStore); // Step #1: Make our first pass over the list of // indexables that make up our request. For each add // or property change in the request, get the Lucene // documents so we can move forward any persistent // properties (for adds) or all old properties (for // property changes). // // Then, for each add or remove in the request, // delete the associated documents from the index. // Note that we previously cached added documents so // that we can move persistent properties forward. // parent_child_old_props is double-nested hashtable (depth-2 tree) // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents) // FIXME: 2-level hashtable is a waste for any non-child document. // Replace this by a better data structure. Hashtable parent_child_old_props = UriFu.NewHashtable(); TermDocs term_docs = secondary_reader.TermDocs(); int delete_count = 0; IEnumerable request_indexables = request.Indexables; foreach (Indexable indexable in request_indexables) { string uri_str = UriFu.UriToEscapedString(indexable.Uri); Term term; // Store the necessary properties from old documents for re-addition if (indexable.Type == IndexableType.Add || indexable.Type == IndexableType.PropertyChange) { term = new Term("Uri", uri_str); term_docs.Seek(term); Hashtable this_parent_child_props = null; if (term_docs.Next()) { this_parent_child_props = UriFu.NewHashtable(); this_parent_child_props [indexable.Uri] = secondary_reader.Document(term_docs.Doc()); parent_child_old_props [indexable.Uri] = this_parent_child_props; } term = new Term("ParentUri", uri_str); term_docs.Seek(term); while (term_docs.Next()) { Document doc = secondary_reader.Document(term_docs.Doc()); string child_uri_str = doc.Get("Uri"); Uri child_uri = UriFu.EscapedStringToUri(child_uri_str); // Any valid lucene document *should* have a Uri, so no need to check for null // Store the child documents too, to save persistent-properties // of child documents this_parent_child_props [child_uri] = doc; } } // Now remove (non-remove indexables will be re-added in next block) Logger.Log.Debug("-{0}", indexable.DisplayUri); int num_delete = 0; term = new Term("Uri", uri_str); // For property changes, only secondary index is modified secondary_reader.DeleteDocuments(term); // Now remove from everywhere else (if asked to remove or if asked to add, in which case // we first remove and then add) // So we also need to remove child documents if (indexable.Type != IndexableType.PropertyChange) { num_delete = primary_reader.DeleteDocuments(term); // When we delete an indexable, also delete any children. // FIXME: Shouldn't we also delete any children of children, etc.? term = new Term("ParentUri", uri_str); num_delete += primary_reader.DeleteDocuments(term); secondary_reader.DeleteDocuments(term); } // If this is a strict removal (and not a deletion that // we are doing in anticipation of adding something back), // queue up a removed receipt. if (indexable.Type == IndexableType.Remove) { IndexerRemovedReceipt r; r = new IndexerRemovedReceipt(indexable.Id); r.NumRemoved = num_delete; receipt_queue.Add(r); } delete_count += num_delete; } term_docs.Close(); if (HaveItemCount) { AdjustItemCount(-delete_count); } else { SetItemCount(primary_reader); } // We are now done with the readers, so we close them. // And also free them. Somehow not freeing them is preventing them from // GCed at all. primary_reader.Close(); primary_reader = null; secondary_reader.Close(); secondary_reader = null; // FIXME: If we crash at exactly this point, we are in // trouble. Items will have been dropped from the index // without the proper replacements being added. We can // hopefully fix this when we move to Lucene 2.1. // Step #2: Make another pass across our list of indexables // and write out any new documents. if (text_cache != null) { text_cache.BeginTransaction(); } IndexWriter primary_writer, secondary_writer; // FIXME: Lock obtain time-out can happen here; if that happens, // an exception will be thrown and this method will break in the middle // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification // methods. primary_writer = new IndexWriter(PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; foreach (Indexable indexable in request_indexables) { // If shutdown has been started, break here // FIXME: Some more processing will continue, a lot of them // concerning receipts, but the daemon will anyway ignore receipts // now, what is the fastest way to stop from here ? if (Shutdown.ShutdownRequested) { Log.Debug("Shutdown initiated. Breaking while flushing indexables."); break; } // Receipts for removes were generated in the // previous block. Now we just have to remove // items from the text cache. if (indexable.Type == IndexableType.Remove) { if (text_cache != null) { text_cache.Delete(indexable.Uri); } continue; } IndexerAddedReceipt r; Hashtable prop_change_docs = (Hashtable)parent_child_old_props [indexable.Uri]; if (indexable.Type == IndexableType.PropertyChange) { Logger.Log.Debug("+{0} (props only)", indexable.DisplayUri); r = new IndexerAddedReceipt(indexable.Id); r.PropertyChangesOnly = true; receipt_queue.Add(r); Document doc; if (prop_change_docs == null) { doc = null; } else { doc = (Document)prop_change_docs [indexable.Uri]; } Document new_doc; new_doc = RewriteDocument(doc, indexable); // Write out the new document... if (secondary_writer == null) { secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false); } secondary_writer.AddDocument(new_doc); // Get child property change indexables... ArrayList prop_change_indexables; prop_change_indexables = GetChildPropertyChange(prop_change_docs, indexable); // and store them; no need to delete them first, since they were already removed from the index if (prop_change_indexables == null) { continue; } foreach (Indexable prop_change_indexable in prop_change_indexables) { Log.Debug("+{0} (props only, generated indexable)", prop_change_indexable.Uri); doc = (Document)prop_change_docs [prop_change_indexable.Uri]; new_doc = RewriteDocument(doc, prop_change_indexable); secondary_writer.AddDocument(new_doc); } continue; // ...and proceed to the next Indexable } // If we reach this point we know we are dealing with an IndexableType.Add if (indexable.Type != IndexableType.Add) { throw new Exception("When I said it was an IndexableType.Add, I meant it!"); } r = AddIndexableToIndex(indexable, primary_writer, ref secondary_writer, prop_change_docs); if (r != null) { receipt_queue.Add(r); } } if (text_cache != null) { text_cache.CommitTransaction(); } if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) { di.Cleanup(); } deferred_indexables.Clear(); foreach (Indexable indexable in request_indexables) { indexable.Cleanup(); } primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } return(null); } if (request.OptimizeIndex) { Stopwatch watch = new Stopwatch(); Logger.Log.Debug("Optimizing {0}", IndexName); watch.Start(); primary_writer.Optimize(); if (secondary_writer == null) { secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false); } secondary_writer.Optimize(); watch.Stop(); Logger.Log.Debug("{0} optimized in {1}", IndexName, watch); } // Step #4. Close our writers and return the events to // indicate what has happened. primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt r = new IndexerIndexablesReceipt(); receipt_queue.Add(r); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) { receipt_array [i] = (IndexerReceipt)receipt_queue [i]; } return(receipt_array); }