override protected Uri PostAddHook(Indexable indexable, IndexerAddedReceipt receipt) { // Retrieve our cached info about the file. CachedFileInfo info; info = file_info_cache [indexable.Uri] as CachedFileInfo; if (info == null) { return(indexable.Uri); } file_info_cache.Remove(info.Uri); // Yeah, this is ghetto. If it's a file that's shared across multiple // indexables, only tag it with when the last indexable has been indexed. if (info.Shared && DecrementReferenceCount(info.Path)) { return(indexable.Uri); } // Since we know that the file has been successfully // indexed, update the file attributes accordingly. // Don't set filter information on a file if multiple // indexables has been created from it. FileAttributes attr; attr = FileAttributesStore.ReadOrCreate(info.Path); attr.LastWriteTime = info.Mtime; // Don't set filter information on a file if multiple indexables has been // created from it. if (!info.Shared) { attr.FilterName = receipt.FilterName; attr.FilterVersion = receipt.FilterVersion; } if (!FileAttributesStore.Write(attr)) { Logger.Log.Warn("Couldn't write attributes for {0}", info.Path); } return(indexable.Uri); }
// If we are remapping Uris, indexables should be added to the // index with the internal Uri attached. This the receipt // will come back w/ an internal Uri. In order for change // notification to work correctly, we have to map it to // an external Uri. // Return the remapped uri. virtual protected Uri PostAddHook(Indexable indexable, IndexerAddedReceipt receipt) { // By default, remapped uri is the indexable uri return(indexable.Uri); }
private IndexerAddedReceipt AddIndexableToIndex(Indexable indexable, IndexWriter primary_writer, ref IndexWriter secondary_writer, Hashtable prop_change_docs) { Filter filter = null; if (FileFilterNotifier != null) { FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, null); // We don't know what filter yet. } // If we have content, try to find a filter // we we can use to process the indexable bool filter_content = false; try { filter_content = FilterFactory.FilterIndexable(indexable, (disable_textcache ? null : text_cache), out filter); } catch { } if (!filter_content) { indexable.NoContent = true; filter = null; } if (FileFilterNotifier != null) { FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, filter); // Update with our filter } IndexerAddedReceipt r = new IndexerAddedReceipt(indexable.Id); if (filter != null) { if (filter.HasGeneratedIndexable) { Log.Debug("{0} might generate indexables from {1}; deferring until later", indexable.DisplayUri, filter.GetType().ToString()); // This indexable can potentially generate indexables, // so defer its indexing DeferredInfo di; di = new DeferredInfo(indexable, filter, prop_change_docs); deferred_indexables.Insert(0, di); // Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet. // FIXME: Make sure all indexable.Cleanup is called for all indexables if // shutdown is signalled. if (FileFilterNotifier != null) { FileFilterNotifier(null, null, null); // reset } // Return null to signal the indexable was deferred return(null); } // Force the clean-up of temporary files, just in case. // FIXME: I am not sure if the cleanup should happen now. // What is the difference between filter.Cleanup and Indexable.Cleanup ? filter.Cleanup(); r.FilterName = filter.GetType().ToString(); r.FilterVersion = filter.Version; } // If this indexables is not deferred, add it to the index. if (indexable.DisplayUri != indexable.ContentUri) { Log.Debug("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri); } else { Log.Debug("+{0}", indexable.DisplayUri); } Document persistent_prop_doc = null; if (prop_change_docs != null) { persistent_prop_doc = (Document)prop_change_docs [indexable.Uri]; } AddDocumentToIndex(indexable, persistent_prop_doc, primary_writer, ref secondary_writer); if (FileFilterNotifier != null) { FileFilterNotifier(null, null, null); // reset } // Clean up any temporary files associated with filtering this indexable. indexable.Cleanup(); // Remove any existing text cache for this item if (disable_textcache && text_cache != null) { text_cache.Delete(indexable.Uri); } return(r); }
private IndexerReceipt [] FlushGeneratedIndexables_Unlocked(IndexerRequest request) { int num_indexed = 0; ArrayList receipt_queue; receipt_queue = new ArrayList(); if (text_cache != null) { text_cache.BeginTransaction(); } IndexWriter primary_writer, secondary_writer; primary_writer = new IndexWriter(PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; IndexerAddedReceipt r; Log.Debug("Continuing indexing generated indexables from {0} indexables", deferred_indexables.Count); // Access using index so that we can add more deferred_indexable at the front // deferred_indexables are added at the front and fetched from the front like a stack while (deferred_indexables.Count > 0) { DeferredInfo di = (DeferredInfo)deferred_indexables [0]; if (di.Indexable.LocalState ["HasNextIndexable"] != null) { // Finally, good to index // Should we do a sanity check ? deferred_indexables [0] =?= di deferred_indexables.RemoveAt(0); Document persistent_prop_doc = null; if (di.PersistentPropDocs != null) { persistent_prop_doc = (Document)di.PersistentPropDocs [di.Indexable.Uri]; } if (di.Indexable.DisplayUri != di.Indexable.ContentUri) { Log.Debug("+{0} ({1}) [deferred]", di.Indexable.DisplayUri, di.Indexable.ContentUri); } else { Log.Debug("+{0} [deferred]", di.Indexable.DisplayUri); } AddDocumentToIndex(di.Indexable, persistent_prop_doc, primary_writer, ref secondary_writer); // Add the receipt if the indexable was submitted and not generated if (di.Indexable.LocalState ["GeneratedIndexable"] == null) { r = new IndexerAddedReceipt(di.Indexable.Id); r.FilterName = di.Filter.GetType().ToString(); r.FilterVersion = di.Filter.Version; receipt_queue.Add(r); } // Cleanup, and text cache maintenance. di.Cleanup(); if (disable_textcache && text_cache != null) { text_cache.Delete(di.Indexable.Uri); } num_indexed++; continue; } Log.Debug("Processing deferred indexable from {0}", di.Indexable.DisplayUri); bool next = false; while (!next && !Shutdown.ShutdownRequested && num_indexed <= RequestFlushThreshold) { Indexable generated_indexable = null; bool next_indexable = false; try { next_indexable = di.Filter.GenerateNextIndexable(out generated_indexable); } catch (Exception e) { Log.Error(e, "Error while generating next indexable from {0}", di.Indexable.DisplayUri); } if (!next_indexable) { // Mark it for indexing and leave it in the stack di.Indexable.LocalState ["HasNextIndexable"] = false; next = true; break; } if (generated_indexable == null) { continue; } Log.Debug("Adding generated indexable {0}", generated_indexable.DisplayUri); // Mark this indexable generated_indexable.LocalState ["GeneratedIndexable"] = true; // IndexerGenerated indexables have a common parenturi, which has been used before // to remove all docs from the lucene index with that parenturi. So, now we can safely // go ahead and just add the new information. r = AddIndexableToIndex(generated_indexable, primary_writer, ref secondary_writer, di.PersistentPropDocs); // But do not add r to the receipt queue, since this was generated if (r != null) // null receipt is returned if generated_indexable is deferred { num_indexed++; } } if (Shutdown.ShutdownRequested || num_indexed > RequestFlushThreshold) { break; } } if (text_cache != null) { text_cache.CommitTransaction(); } if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) { di.Cleanup(); } deferred_indexables.Clear(); primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } return(null); } primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug("{0} more indexable-generating indexable remainding to index; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt paused_receipt = new IndexerIndexablesReceipt(); receipt_queue.Add(paused_receipt); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) { receipt_array [i] = (IndexerReceipt)receipt_queue [i]; } return(receipt_array); }
private IndexerReceipt [] Flush_Unlocked(IndexerRequest request) { ArrayList receipt_queue; receipt_queue = new ArrayList(); IndexReader primary_reader, secondary_reader; primary_reader = IndexReader.Open(PrimaryStore); secondary_reader = IndexReader.Open(SecondaryStore); // Step #1: Make our first pass over the list of // indexables that make up our request. For each add // or property change in the request, get the Lucene // documents so we can move forward any persistent // properties (for adds) or all old properties (for // property changes). // // Then, for each add or remove in the request, // delete the associated documents from the index. // Note that we previously cached added documents so // that we can move persistent properties forward. // parent_child_old_props is double-nested hashtable (depth-2 tree) // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents) // FIXME: 2-level hashtable is a waste for any non-child document. // Replace this by a better data structure. Hashtable parent_child_old_props = UriFu.NewHashtable(); TermDocs term_docs = secondary_reader.TermDocs(); int delete_count = 0; IEnumerable request_indexables = request.Indexables; foreach (Indexable indexable in request_indexables) { string uri_str = UriFu.UriToEscapedString(indexable.Uri); Term term; // Store the necessary properties from old documents for re-addition if (indexable.Type == IndexableType.Add || indexable.Type == IndexableType.PropertyChange) { term = new Term("Uri", uri_str); term_docs.Seek(term); Hashtable this_parent_child_props = null; if (term_docs.Next()) { this_parent_child_props = UriFu.NewHashtable(); this_parent_child_props [indexable.Uri] = secondary_reader.Document(term_docs.Doc()); parent_child_old_props [indexable.Uri] = this_parent_child_props; } term = new Term("ParentUri", uri_str); term_docs.Seek(term); while (term_docs.Next()) { Document doc = secondary_reader.Document(term_docs.Doc()); string child_uri_str = doc.Get("Uri"); Uri child_uri = UriFu.EscapedStringToUri(child_uri_str); // Any valid lucene document *should* have a Uri, so no need to check for null // Store the child documents too, to save persistent-properties // of child documents this_parent_child_props [child_uri] = doc; } } // Now remove (non-remove indexables will be re-added in next block) Logger.Log.Debug("-{0}", indexable.DisplayUri); int num_delete = 0; term = new Term("Uri", uri_str); // For property changes, only secondary index is modified secondary_reader.DeleteDocuments(term); // Now remove from everywhere else (if asked to remove or if asked to add, in which case // we first remove and then add) // So we also need to remove child documents if (indexable.Type != IndexableType.PropertyChange) { num_delete = primary_reader.DeleteDocuments(term); // When we delete an indexable, also delete any children. // FIXME: Shouldn't we also delete any children of children, etc.? term = new Term("ParentUri", uri_str); num_delete += primary_reader.DeleteDocuments(term); secondary_reader.DeleteDocuments(term); } // If this is a strict removal (and not a deletion that // we are doing in anticipation of adding something back), // queue up a removed receipt. if (indexable.Type == IndexableType.Remove) { IndexerRemovedReceipt r; r = new IndexerRemovedReceipt(indexable.Id); r.NumRemoved = num_delete; receipt_queue.Add(r); } delete_count += num_delete; } term_docs.Close(); if (HaveItemCount) { AdjustItemCount(-delete_count); } else { SetItemCount(primary_reader); } // We are now done with the readers, so we close them. // And also free them. Somehow not freeing them is preventing them from // GCed at all. primary_reader.Close(); primary_reader = null; secondary_reader.Close(); secondary_reader = null; // FIXME: If we crash at exactly this point, we are in // trouble. Items will have been dropped from the index // without the proper replacements being added. We can // hopefully fix this when we move to Lucene 2.1. // Step #2: Make another pass across our list of indexables // and write out any new documents. if (text_cache != null) { text_cache.BeginTransaction(); } IndexWriter primary_writer, secondary_writer; // FIXME: Lock obtain time-out can happen here; if that happens, // an exception will be thrown and this method will break in the middle // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification // methods. primary_writer = new IndexWriter(PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; foreach (Indexable indexable in request_indexables) { // If shutdown has been started, break here // FIXME: Some more processing will continue, a lot of them // concerning receipts, but the daemon will anyway ignore receipts // now, what is the fastest way to stop from here ? if (Shutdown.ShutdownRequested) { Log.Debug("Shutdown initiated. Breaking while flushing indexables."); break; } // Receipts for removes were generated in the // previous block. Now we just have to remove // items from the text cache. if (indexable.Type == IndexableType.Remove) { if (text_cache != null) { text_cache.Delete(indexable.Uri); } continue; } IndexerAddedReceipt r; Hashtable prop_change_docs = (Hashtable)parent_child_old_props [indexable.Uri]; if (indexable.Type == IndexableType.PropertyChange) { Logger.Log.Debug("+{0} (props only)", indexable.DisplayUri); r = new IndexerAddedReceipt(indexable.Id); r.PropertyChangesOnly = true; receipt_queue.Add(r); Document doc; if (prop_change_docs == null) { doc = null; } else { doc = (Document)prop_change_docs [indexable.Uri]; } Document new_doc; new_doc = RewriteDocument(doc, indexable); // Write out the new document... if (secondary_writer == null) { secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false); } secondary_writer.AddDocument(new_doc); // Get child property change indexables... ArrayList prop_change_indexables; prop_change_indexables = GetChildPropertyChange(prop_change_docs, indexable); // and store them; no need to delete them first, since they were already removed from the index if (prop_change_indexables == null) { continue; } foreach (Indexable prop_change_indexable in prop_change_indexables) { Log.Debug("+{0} (props only, generated indexable)", prop_change_indexable.Uri); doc = (Document)prop_change_docs [prop_change_indexable.Uri]; new_doc = RewriteDocument(doc, prop_change_indexable); secondary_writer.AddDocument(new_doc); } continue; // ...and proceed to the next Indexable } // If we reach this point we know we are dealing with an IndexableType.Add if (indexable.Type != IndexableType.Add) { throw new Exception("When I said it was an IndexableType.Add, I meant it!"); } r = AddIndexableToIndex(indexable, primary_writer, ref secondary_writer, prop_change_docs); if (r != null) { receipt_queue.Add(r); } } if (text_cache != null) { text_cache.CommitTransaction(); } if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) { di.Cleanup(); } deferred_indexables.Clear(); foreach (Indexable indexable in request_indexables) { indexable.Cleanup(); } primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } return(null); } if (request.OptimizeIndex) { Stopwatch watch = new Stopwatch(); Logger.Log.Debug("Optimizing {0}", IndexName); watch.Start(); primary_writer.Optimize(); if (secondary_writer == null) { secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false); } secondary_writer.Optimize(); watch.Stop(); Logger.Log.Debug("{0} optimized in {1}", IndexName, watch); } // Step #4. Close our writers and return the events to // indicate what has happened. primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt r = new IndexerIndexablesReceipt(); receipt_queue.Add(r); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) { receipt_array [i] = (IndexerReceipt)receipt_queue [i]; } return(receipt_array); }
override protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { // Retrieve our cached info about the file. CachedFileInfo info; info = file_info_cache [indexable.Uri] as CachedFileInfo; if (info == null) return indexable.Uri; file_info_cache.Remove (info.Uri); // Yeah, this is ghetto. If it's a file that's shared across multiple // indexables, only tag it with when the last indexable has been indexed. if (info.Shared && DecrementReferenceCount (info.Path)) return indexable.Uri; // Since we know that the file has been successfully // indexed, update the file attributes accordingly. // Don't set filter information on a file if multiple // indexables has been created from it. FileAttributes attr; attr = FileAttributesStore.ReadOrCreate (info.Path); attr.LastWriteTime = info.Mtime; // Don't set filter information on a file if multiple indexables has been // created from it. if (! info.Shared) { attr.FilterName = receipt.FilterName; attr.FilterVersion = receipt.FilterVersion; } if (! FileAttributesStore.Write (attr)) Logger.Log.Warn ("Couldn't write attributes for {0}", info.Path); return indexable.Uri; }
override protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { base.PostAddHook (indexable, receipt); // Store the note's text in the text cache. // By doing this in the PostAddHook, we ensure that // the TextCache is not modified until we are // sure that the note was actually indexed. string text; text = (string) note_text_cache [indexable.Uri]; // If text == null, this is equivalent to // calling Delete (receipt.Uri) TextCache.UserCache.WriteFromString (indexable.Uri, text); note_text_cache.Remove (indexable.Uri); return indexable.Uri; }
// This is mostly a copy of LuceneQueryable.Flush + FSQ.PostAddHooks/PostRemoveHook static bool FlushIndexer(IIndexer indexer) { IndexerRequest flushed_request; if (pending_request.IsEmpty) { return(false); } flushed_request = pending_request; pending_request = new IndexerRequest(); IndexerReceipt [] receipts; receipts = indexer.Flush(flushed_request); // Flush will return null if it encounters a shutdown during flushing if (receipts == null) { return(false); } fa_store.BeginTransaction(); bool indexer_indexable_receipt = false; foreach (IndexerReceipt raw_r in receipts) { if (raw_r is IndexerAddedReceipt) { // Update the file attributes IndexerAddedReceipt r = (IndexerAddedReceipt)raw_r; Indexable indexable = flushed_request.RetrieveRequestIndexable(r); if (indexable == null) { Logger.Log.Debug("Should not happen! Previously requested indexable with id #{0} has eloped!", r.Id); continue; } // We don't need to write out any file attributes for // children. if (indexable.ParentUri != null) { continue; } string path = indexable.Uri.LocalPath; FileAttributes attr; attr = fa_store.ReadOrCreate(path); attr.LastWriteTime = indexable.Timestamp; attr.FilterName = r.FilterName; attr.FilterVersion = r.FilterVersion; fa_store.Write(attr); } else if (raw_r is IndexerRemovedReceipt) { // Update the file attributes IndexerRemovedReceipt r = (IndexerRemovedReceipt)raw_r; Indexable indexable = flushed_request.RetrieveRequestIndexable(r); if (indexable == null) // Should never happen { Log.Warn("Unable to match indexable-remove #{0} to any request!", r.Id); continue; } string path = indexable.Uri.LocalPath; Logger.Log.Debug("Removing: '{0}'", path); fa_store.Drop(path); } else if (raw_r is IndexerIndexablesReceipt) { indexer_indexable_receipt = true; } } pending_request.DeferredIndexables = flushed_request.DeferredIndexables; // Reschedule if some indexable generated more indexables if (indexer_indexable_receipt) { pending_request.ContinueIndexing = true; return(true); } fa_store.CommitTransaction(); return(false); }
private IndexerReceipt [] FlushGeneratedIndexables_Unlocked (IndexerRequest request) { int num_indexed = 0; ArrayList receipt_queue; receipt_queue = new ArrayList (); if (text_cache != null) text_cache.BeginTransaction (); IndexWriter primary_writer, secondary_writer; primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; IndexerAddedReceipt r; Log.Debug ("Continuing indexing generated indexables from {0} indexables", deferred_indexables.Count); // Access using index so that we can add more deferred_indexable at the front // deferred_indexables are added at the front and fetched from the front like a stack while (deferred_indexables.Count > 0) { DeferredInfo di = (DeferredInfo) deferred_indexables [0]; if (di.Indexable.LocalState ["HasNextIndexable"] != null) { // Finally, good to index // Should we do a sanity check ? deferred_indexables [0] =?= di deferred_indexables.RemoveAt (0); Document persistent_prop_doc = null; if (di.PersistentPropDocs != null) persistent_prop_doc = (Document) di.PersistentPropDocs [di.Indexable.Uri]; if (di.Indexable.DisplayUri != di.Indexable.ContentUri) Log.Debug ("+{0} ({1}) [deferred]", di.Indexable.DisplayUri, di.Indexable.ContentUri); else Log.Debug ("+{0} [deferred]", di.Indexable.DisplayUri); AddDocumentToIndex (di.Indexable, persistent_prop_doc, primary_writer, ref secondary_writer); // Add the receipt if the indexable was submitted and not generated if (di.Indexable.LocalState ["GeneratedIndexable"] == null) { r = new IndexerAddedReceipt (di.Indexable.Id); r.FilterName = di.Filter.GetType ().ToString (); r.FilterVersion = di.Filter.Version; receipt_queue.Add (r); } // Cleanup, and text cache maintenance. di.Cleanup (); if (disable_textcache && text_cache != null) text_cache.Delete (di.Indexable.Uri); num_indexed ++; continue; } Log.Debug ("Processing deferred indexable from {0}", di.Indexable.DisplayUri); bool next = false; while (! next && ! Shutdown.ShutdownRequested && num_indexed <= RequestFlushThreshold) { Indexable generated_indexable = null; bool next_indexable = false; try { next_indexable = di.Filter.GenerateNextIndexable (out generated_indexable); } catch (Exception e) { Log.Error (e, "Error while generating next indexable from {0}", di.Indexable.DisplayUri); } if (! next_indexable) { // Mark it for indexing and leave it in the stack di.Indexable.LocalState ["HasNextIndexable"] = false; next = true; break; } if (generated_indexable == null) continue; Log.Debug ("Adding generated indexable {0}", generated_indexable.DisplayUri); // Mark this indexable generated_indexable.LocalState ["GeneratedIndexable"] = true; // IndexerGenerated indexables have a common parenturi, which has been used before // to remove all docs from the lucene index with that parenturi. So, now we can safely // go ahead and just add the new information. r = AddIndexableToIndex (generated_indexable, primary_writer, ref secondary_writer, di.PersistentPropDocs); // But do not add r to the receipt queue, since this was generated if (r != null) // null receipt is returned if generated_indexable is deferred num_indexed ++; } if (Shutdown.ShutdownRequested || num_indexed > RequestFlushThreshold) break; } if (text_cache != null) text_cache.CommitTransaction (); if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) di.Cleanup (); deferred_indexables.Clear (); primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); return null; } primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug ("{0} more indexable-generating indexable remainding to index; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt paused_receipt = new IndexerIndexablesReceipt (); receipt_queue.Add (paused_receipt); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) receipt_array [i] = (IndexerReceipt) receipt_queue [i]; return receipt_array; }
private IndexerAddedReceipt AddIndexableToIndex (Indexable indexable, IndexWriter primary_writer, ref IndexWriter secondary_writer, Hashtable prop_change_docs) { Filter filter = null; if (FileFilterNotifier != null) FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, null); // We don't know what filter yet. // If we have content, try to find a filter // we we can use to process the indexable bool filter_content = false; try { filter_content = FilterFactory.FilterIndexable (indexable, (disable_textcache ? null : text_cache), out filter); } catch { } if (! filter_content) { indexable.NoContent = true; filter = null; } if (FileFilterNotifier != null) FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, filter); // Update with our filter IndexerAddedReceipt r = new IndexerAddedReceipt (indexable.Id); if (filter != null) { if (filter.HasGeneratedIndexable) { Log.Debug ("{0} might generate indexables from {1}; deferring until later", indexable.DisplayUri, filter.GetType ().ToString ()); // This indexable can potentially generate indexables, // so defer its indexing DeferredInfo di; di = new DeferredInfo (indexable, filter, prop_change_docs); deferred_indexables.Insert (0, di); // Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet. // FIXME: Make sure all indexable.Cleanup is called for all indexables if // shutdown is signalled. if (FileFilterNotifier != null) FileFilterNotifier (null, null, null); // reset // Return null to signal the indexable was deferred return null; } // Force the clean-up of temporary files, just in case. // FIXME: I am not sure if the cleanup should happen now. // What is the difference between filter.Cleanup and Indexable.Cleanup ? filter.Cleanup (); r.FilterName = filter.GetType ().ToString (); r.FilterVersion = filter.Version; } // If this indexables is not deferred, add it to the index. if (indexable.DisplayUri != indexable.ContentUri) Log.Debug ("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri); else Log.Debug ("+{0}", indexable.DisplayUri); Document persistent_prop_doc = null; if (prop_change_docs != null) persistent_prop_doc = (Document) prop_change_docs [indexable.Uri]; AddDocumentToIndex (indexable, persistent_prop_doc, primary_writer, ref secondary_writer); if (FileFilterNotifier != null) FileFilterNotifier (null, null, null); // reset // Clean up any temporary files associated with filtering this indexable. indexable.Cleanup (); // Remove any existing text cache for this item if (disable_textcache && text_cache != null) text_cache.Delete (indexable.Uri); return r; }
private IndexerReceipt [] Flush_Unlocked (IndexerRequest request) { ArrayList receipt_queue; receipt_queue = new ArrayList (); IndexReader primary_reader, secondary_reader; primary_reader = IndexReader.Open (PrimaryStore); secondary_reader = IndexReader.Open (SecondaryStore); // Step #1: Make our first pass over the list of // indexables that make up our request. For each add // or property change in the request, get the Lucene // documents so we can move forward any persistent // properties (for adds) or all old properties (for // property changes). // // Then, for each add or remove in the request, // delete the associated documents from the index. // Note that we previously cached added documents so // that we can move persistent properties forward. // parent_child_old_props is double-nested hashtable (depth-2 tree) // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents) // FIXME: 2-level hashtable is a waste for any non-child document. // Replace this by a better data structure. Hashtable parent_child_old_props = UriFu.NewHashtable (); TermDocs term_docs = secondary_reader.TermDocs (); int delete_count = 0; IEnumerable request_indexables = request.Indexables; foreach (Indexable indexable in request_indexables) { string uri_str = UriFu.UriToEscapedString (indexable.Uri); Term term; // Store the necessary properties from old documents for re-addition if (indexable.Type == IndexableType.Add || indexable.Type == IndexableType.PropertyChange) { term = new Term ("Uri", uri_str); term_docs.Seek (term); Hashtable this_parent_child_props = null; if (term_docs.Next ()) { this_parent_child_props = UriFu.NewHashtable (); this_parent_child_props [indexable.Uri] = secondary_reader.Document (term_docs.Doc ()); parent_child_old_props [indexable.Uri] = this_parent_child_props; } term = new Term ("ParentUri", uri_str); term_docs.Seek (term); while (term_docs.Next ()) { Document doc = secondary_reader.Document (term_docs.Doc ()); string child_uri_str = doc.Get ("Uri"); Uri child_uri = UriFu.EscapedStringToUri (child_uri_str); // Any valid lucene document *should* have a Uri, so no need to check for null // Store the child documents too, to save persistent-properties // of child documents this_parent_child_props [child_uri] = doc; } } // Now remove (non-remove indexables will be re-added in next block) Logger.Log.Debug ("-{0}", indexable.DisplayUri); int num_delete = 0; term = new Term ("Uri", uri_str); // For property changes, only secondary index is modified secondary_reader.DeleteDocuments (term); // Now remove from everywhere else (if asked to remove or if asked to add, in which case // we first remove and then add) // So we also need to remove child documents if (indexable.Type != IndexableType.PropertyChange) { num_delete = primary_reader.DeleteDocuments (term); // When we delete an indexable, also delete any children. // FIXME: Shouldn't we also delete any children of children, etc.? term = new Term ("ParentUri", uri_str); num_delete += primary_reader.DeleteDocuments (term); secondary_reader.DeleteDocuments (term); } // If this is a strict removal (and not a deletion that // we are doing in anticipation of adding something back), // queue up a removed receipt. if (indexable.Type == IndexableType.Remove) { IndexerRemovedReceipt r; r = new IndexerRemovedReceipt (indexable.Id); r.NumRemoved = num_delete; receipt_queue.Add (r); } delete_count += num_delete; } term_docs.Close (); if (HaveItemCount) AdjustItemCount (-delete_count); else SetItemCount (primary_reader); // We are now done with the readers, so we close them. // And also free them. Somehow not freeing them is preventing them from // GCed at all. primary_reader.Close (); primary_reader = null; secondary_reader.Close (); secondary_reader = null; // FIXME: If we crash at exactly this point, we are in // trouble. Items will have been dropped from the index // without the proper replacements being added. We can // hopefully fix this when we move to Lucene 2.1. // Step #2: Make another pass across our list of indexables // and write out any new documents. if (text_cache != null) text_cache.BeginTransaction (); IndexWriter primary_writer, secondary_writer; // FIXME: Lock obtain time-out can happen here; if that happens, // an exception will be thrown and this method will break in the middle // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification // methods. primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; foreach (Indexable indexable in request_indexables) { // If shutdown has been started, break here // FIXME: Some more processing will continue, a lot of them // concerning receipts, but the daemon will anyway ignore receipts // now, what is the fastest way to stop from here ? if (Shutdown.ShutdownRequested) { Log.Debug ("Shutdown initiated. Breaking while flushing indexables."); break; } // Receipts for removes were generated in the // previous block. Now we just have to remove // items from the text cache. if (indexable.Type == IndexableType.Remove) { if (text_cache != null) text_cache.Delete (indexable.Uri); continue; } IndexerAddedReceipt r; Hashtable prop_change_docs = (Hashtable) parent_child_old_props [indexable.Uri]; if (indexable.Type == IndexableType.PropertyChange) { Logger.Log.Debug ("+{0} (props only)", indexable.DisplayUri); r = new IndexerAddedReceipt (indexable.Id); r.PropertyChangesOnly = true; receipt_queue.Add (r); Document doc; if (prop_change_docs == null) doc = null; else doc = (Document) prop_change_docs [indexable.Uri]; Document new_doc; new_doc = RewriteDocument (doc, indexable); // Write out the new document... if (secondary_writer == null) secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false); secondary_writer.AddDocument (new_doc); // Get child property change indexables... ArrayList prop_change_indexables; prop_change_indexables = GetChildPropertyChange (prop_change_docs, indexable); // and store them; no need to delete them first, since they were already removed from the index if (prop_change_indexables == null) continue; foreach (Indexable prop_change_indexable in prop_change_indexables) { Log.Debug ("+{0} (props only, generated indexable)", prop_change_indexable.Uri); doc = (Document) prop_change_docs [prop_change_indexable.Uri]; new_doc = RewriteDocument (doc, prop_change_indexable); secondary_writer.AddDocument (new_doc); } continue; // ...and proceed to the next Indexable } // If we reach this point we know we are dealing with an IndexableType.Add if (indexable.Type != IndexableType.Add) throw new Exception ("When I said it was an IndexableType.Add, I meant it!"); r = AddIndexableToIndex (indexable, primary_writer, ref secondary_writer, prop_change_docs); if (r != null) receipt_queue.Add (r); } if (text_cache != null) text_cache.CommitTransaction (); if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) di.Cleanup (); deferred_indexables.Clear (); foreach (Indexable indexable in request_indexables) indexable.Cleanup (); primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); return null; } if (request.OptimizeIndex) { Stopwatch watch = new Stopwatch (); Logger.Log.Debug ("Optimizing {0}", IndexName); watch.Start (); primary_writer.Optimize (); if (secondary_writer == null) secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false); secondary_writer.Optimize (); watch.Stop (); Logger.Log.Debug ("{0} optimized in {1}", IndexName, watch); } // Step #4. Close our writers and return the events to // indicate what has happened. primary_writer.Close (); if (secondary_writer != null) secondary_writer.Close (); // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug ("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt r = new IndexerIndexablesReceipt (); receipt_queue.Add (r); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) receipt_array [i] = (IndexerReceipt) receipt_queue [i]; return receipt_array; }
protected override Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { base.PostAddHook (indexable, receipt); if (indexable_text_cache.ContainsKey (indexable.Uri)) { string text = (string)indexable_text_cache [indexable.Uri]; TextCache.UserCache.WriteFromString (indexable.Uri, text); indexable_text_cache.Remove (indexable.Uri); } return indexable.Uri; }
// If we are remapping Uris, indexables should be added to the // index with the internal Uri attached. This the receipt // will come back w/ an internal Uri. In order for change // notification to work correctly, we have to map it to // an external Uri. // Return the remapped uri. virtual protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { // By default, remapped uri is the indexable uri return indexable.Uri; }
override protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { // We don't have anything to do if we are dealing with a child indexable if (indexable.ParentUri != null) return indexable.DisplayUri; string xmpfile_path = (string) indexable.LocalState ["XmpFilePath"]; if (xmpfile_path != null) { // Get the uid of the xmp file string xmp_id_string = (string) indexable.LocalState ["XmpGuid"]; Guid xmp_id = GuidFu.FromShortString (xmp_id_string); FileAttributes xmp_attr; xmp_attr = FileAttributesStore.ReadOrCreate (xmpfile_path, xmp_id); xmp_attr.Path = xmpfile_path; // Potential race here, attr->LastWriteTime should really be the last write // time as seen when this indexable was added xmp_attr.LastWriteTime = File.GetLastWriteTimeUtc (xmpfile_path); // Add filter information, otherwise the xmp file will be indexed on each recrawl xmp_attr.FilterName = XmpFile.FilterName; xmp_attr.FilterVersion = XmpFile.FilterVersion; // Write file attributes for xmp file if (Debug) Log.Debug ("Writing attributes for xmp {0}({1})", xmpfile_path, xmp_id_string); FileAttributesStore.Write (xmp_attr); uid_manager.ForgetNewId (xmpfile_path); } if (indexable.Type == IndexableType.PropertyChange) { // If we were moved, remap to our *old* external Uri // to make notification work out properly. Otherwise, // this is an in-place property change and we don't // need to do anything. Uri remapped_uri = indexable.DisplayUri; string last_known_path; last_known_path = (string) indexable.LocalState ["LastKnownPath"]; if (last_known_path != null) { remapped_uri = UriFu.PathToFileUri (last_known_path); Logger.Log.Debug ("Last known path is {0}", last_known_path); // This rename is now in the index, so we no // longer need to keep track of the uid in memory. uid_manager.ForgetNewId (last_known_path); } else if (xmpfile_path != null) { // Get the correct uri for notifications string basefile_path = (string) indexable.LocalState ["BaseFilePath"]; remapped_uri = UriFu.PathToFileUri (basefile_path); } return remapped_uri; } string path; path = (string) indexable.LocalState ["Path"]; if (Debug) Log.Debug ("PostAddHook for {0} ({1})", indexable.Uri, path); uid_manager.ForgetNewId (path); DirectoryModel parent; parent = indexable.LocalState ["Parent"] as DirectoryModel; // The parent directory might have run away since we were indexed if (parent != null && ! parent.IsAttached) return indexable.DisplayUri; Guid unique_id; unique_id = GuidFu.FromUri (indexable.Uri); FileAttributes attr; attr = FileAttributesStore.ReadOrCreate (path, unique_id); attr.Path = path; attr.LastWriteTime = indexable.Timestamp; attr.FilterName = receipt.FilterName; attr.FilterVersion = receipt.FilterVersion; if (indexable.LocalState ["IsWalkable"] != null) { string name; name = (string) indexable.LocalState ["Name"]; if (! RegisterDirectory (name, parent, attr)) return indexable.DisplayUri; } FileAttributesStore.Write (attr); // Return the remapped Uri so that change notification will work properly return UriFu.PathToFileUri (path); }
protected override Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { FileInfo meta_file = indexable.LocalState ["MetaFile"] as FileInfo; if (meta_file == null) return indexable.Uri; meta_file.Delete (); lock (pending_files) pending_files.Remove (indexable.ContentUri.LocalPath); return indexable.Uri; }