Пример #1
0
        override protected Uri PostAddHook(Indexable indexable, IndexerAddedReceipt receipt)
        {
            // Retrieve our cached info about the file.
            CachedFileInfo info;

            info = file_info_cache [indexable.Uri] as CachedFileInfo;
            if (info == null)
            {
                return(indexable.Uri);
            }

            file_info_cache.Remove(info.Uri);

            // Yeah, this is ghetto. If it's a file that's shared across multiple
            // indexables, only tag it with when the last indexable has been indexed.
            if (info.Shared && DecrementReferenceCount(info.Path))
            {
                return(indexable.Uri);
            }

            // Since we know that the file has been successfully
            // indexed, update the file attributes accordingly.
            // Don't set filter information on a file if multiple
            // indexables has been created from it.
            FileAttributes attr;

            attr = FileAttributesStore.ReadOrCreate(info.Path);

            attr.LastWriteTime = info.Mtime;

            // Don't set filter information on a file if multiple indexables has been
            // created from it.
            if (!info.Shared)
            {
                attr.FilterName    = receipt.FilterName;
                attr.FilterVersion = receipt.FilterVersion;
            }

            if (!FileAttributesStore.Write(attr))
            {
                Logger.Log.Warn("Couldn't write attributes for {0}", info.Path);
            }

            return(indexable.Uri);
        }
Пример #2
0
 // If we are remapping Uris, indexables should be added to the
 // index with the internal Uri attached.  This the receipt
 // will come back w/ an internal Uri.  In order for change
 // notification to work correctly, we have to map it to
 // an external Uri.
 // Return the remapped uri.
 virtual protected Uri PostAddHook(Indexable indexable, IndexerAddedReceipt receipt)
 {
     // By default, remapped uri is the indexable uri
     return(indexable.Uri);
 }
        private IndexerAddedReceipt AddIndexableToIndex(Indexable indexable,
                                                        IndexWriter primary_writer,
                                                        ref IndexWriter secondary_writer,
                                                        Hashtable prop_change_docs)
        {
            Filter filter = null;

            if (FileFilterNotifier != null)
            {
                FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, null);                  // We don't know what filter yet.
            }
            // If we have content, try to find a filter
            // we we can use to process the indexable
            bool filter_content = false;

            try {
                filter_content = FilterFactory.FilterIndexable(indexable, (disable_textcache ? null : text_cache), out filter);
            } catch { }

            if (!filter_content)
            {
                indexable.NoContent = true;
                filter = null;
            }

            if (FileFilterNotifier != null)
            {
                FileFilterNotifier(indexable.DisplayUri, indexable.ContentUri, filter);                  // Update with our filter
            }
            IndexerAddedReceipt r = new IndexerAddedReceipt(indexable.Id);

            if (filter != null)
            {
                if (filter.HasGeneratedIndexable)
                {
                    Log.Debug("{0} might generate indexables from {1}; deferring until later",
                              indexable.DisplayUri,
                              filter.GetType().ToString());

                    // This indexable can potentially generate indexables,
                    // so defer its indexing

                    DeferredInfo di;
                    di = new DeferredInfo(indexable, filter, prop_change_docs);
                    deferred_indexables.Insert(0, di);

                    // Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet.
                    // FIXME: Make sure all indexable.Cleanup is called for all indexables if
                    // shutdown is signalled.
                    if (FileFilterNotifier != null)
                    {
                        FileFilterNotifier(null, null, null);                          // reset
                    }
                    // Return null to signal the indexable was deferred
                    return(null);
                }

                // Force the clean-up of temporary files, just in case.
                // FIXME: I am not sure if the cleanup should happen now.
                // What is the difference between filter.Cleanup and Indexable.Cleanup ?
                filter.Cleanup();

                r.FilterName    = filter.GetType().ToString();
                r.FilterVersion = filter.Version;
            }

            // If this indexables is not deferred, add it to the index.
            if (indexable.DisplayUri != indexable.ContentUri)
            {
                Log.Debug("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri);
            }
            else
            {
                Log.Debug("+{0}", indexable.DisplayUri);
            }

            Document persistent_prop_doc = null;

            if (prop_change_docs != null)
            {
                persistent_prop_doc = (Document)prop_change_docs [indexable.Uri];
            }
            AddDocumentToIndex(indexable, persistent_prop_doc, primary_writer, ref secondary_writer);

            if (FileFilterNotifier != null)
            {
                FileFilterNotifier(null, null, null);                  // reset
            }
            // Clean up any temporary files associated with filtering this indexable.
            indexable.Cleanup();

            // Remove any existing text cache for this item
            if (disable_textcache && text_cache != null)
            {
                text_cache.Delete(indexable.Uri);
            }

            return(r);
        }
        private IndexerReceipt [] FlushGeneratedIndexables_Unlocked(IndexerRequest request)
        {
            int       num_indexed = 0;
            ArrayList receipt_queue;

            receipt_queue = new ArrayList();

            if (text_cache != null)
            {
                text_cache.BeginTransaction();
            }

            IndexWriter primary_writer, secondary_writer;

            primary_writer   = new IndexWriter(PrimaryStore, IndexingAnalyzer, false);
            secondary_writer = null;
            IndexerAddedReceipt r;

            Log.Debug("Continuing indexing generated indexables from {0} indexables", deferred_indexables.Count);

            // Access using index so that we can add more deferred_indexable at the front
            // deferred_indexables are added at the front and fetched from the front like a stack
            while (deferred_indexables.Count > 0)
            {
                DeferredInfo di = (DeferredInfo)deferred_indexables [0];

                if (di.Indexable.LocalState ["HasNextIndexable"] != null)
                {
                    // Finally, good to index
                    // Should we do a sanity check ? deferred_indexables [0] =?= di
                    deferred_indexables.RemoveAt(0);

                    Document persistent_prop_doc = null;
                    if (di.PersistentPropDocs != null)
                    {
                        persistent_prop_doc = (Document)di.PersistentPropDocs [di.Indexable.Uri];
                    }

                    if (di.Indexable.DisplayUri != di.Indexable.ContentUri)
                    {
                        Log.Debug("+{0} ({1}) [deferred]", di.Indexable.DisplayUri, di.Indexable.ContentUri);
                    }
                    else
                    {
                        Log.Debug("+{0} [deferred]", di.Indexable.DisplayUri);
                    }

                    AddDocumentToIndex(di.Indexable, persistent_prop_doc, primary_writer, ref secondary_writer);

                    // Add the receipt if the indexable was submitted and not generated
                    if (di.Indexable.LocalState ["GeneratedIndexable"] == null)
                    {
                        r               = new IndexerAddedReceipt(di.Indexable.Id);
                        r.FilterName    = di.Filter.GetType().ToString();
                        r.FilterVersion = di.Filter.Version;
                        receipt_queue.Add(r);
                    }

                    // Cleanup, and text cache maintenance.
                    di.Cleanup();

                    if (disable_textcache && text_cache != null)
                    {
                        text_cache.Delete(di.Indexable.Uri);
                    }

                    num_indexed++;
                    continue;
                }

                Log.Debug("Processing deferred indexable from {0}", di.Indexable.DisplayUri);
                bool next = false;
                while (!next && !Shutdown.ShutdownRequested && num_indexed <= RequestFlushThreshold)
                {
                    Indexable generated_indexable = null;

                    bool next_indexable = false;
                    try {
                        next_indexable = di.Filter.GenerateNextIndexable(out generated_indexable);
                    } catch (Exception e) {
                        Log.Error(e, "Error while generating next indexable from {0}", di.Indexable.DisplayUri);
                    }

                    if (!next_indexable)
                    {
                        // Mark it for indexing and leave it in the stack
                        di.Indexable.LocalState ["HasNextIndexable"] = false;
                        next = true;
                        break;
                    }

                    if (generated_indexable == null)
                    {
                        continue;
                    }

                    Log.Debug("Adding generated indexable {0}", generated_indexable.DisplayUri);

                    // Mark this indexable
                    generated_indexable.LocalState ["GeneratedIndexable"] = true;

                    // IndexerGenerated indexables have a common parenturi, which has been used before
                    // to remove all docs from the lucene index with that parenturi. So, now we can safely
                    // go ahead and just add the new information.
                    r = AddIndexableToIndex(generated_indexable, primary_writer, ref secondary_writer, di.PersistentPropDocs);
                    // But do not add r to the receipt queue, since this was generated
                    if (r != null)                     // null receipt is returned if generated_indexable is deferred
                    {
                        num_indexed++;
                    }
                }

                if (Shutdown.ShutdownRequested || num_indexed > RequestFlushThreshold)
                {
                    break;
                }
            }

            if (text_cache != null)
            {
                text_cache.CommitTransaction();
            }

            if (Shutdown.ShutdownRequested)
            {
                foreach (DeferredInfo di in deferred_indexables)
                {
                    di.Cleanup();
                }
                deferred_indexables.Clear();

                primary_writer.Close();
                if (secondary_writer != null)
                {
                    secondary_writer.Close();
                }

                return(null);
            }

            primary_writer.Close();
            if (secondary_writer != null)
            {
                secondary_writer.Close();
            }

            // Send a single IndexerIndexablesReceipt if there were deferred indexables
            if (deferred_indexables.Count > 0)
            {
                Log.Debug("{0} more indexable-generating indexable remainding to index; asking daemon to schedule their indexing.", deferred_indexables.Count);
                IndexerIndexablesReceipt paused_receipt = new IndexerIndexablesReceipt();
                receipt_queue.Add(paused_receipt);
            }

            IndexerReceipt [] receipt_array;
            receipt_array = new IndexerReceipt [receipt_queue.Count];
            for (int i = 0; i < receipt_queue.Count; ++i)
            {
                receipt_array [i] = (IndexerReceipt)receipt_queue [i];
            }

            return(receipt_array);
        }
        private IndexerReceipt [] Flush_Unlocked(IndexerRequest request)
        {
            ArrayList receipt_queue;

            receipt_queue = new ArrayList();

            IndexReader primary_reader, secondary_reader;

            primary_reader   = IndexReader.Open(PrimaryStore);
            secondary_reader = IndexReader.Open(SecondaryStore);

            // Step #1: Make our first pass over the list of
            // indexables that make up our request.  For each add
            // or property change in the request, get the Lucene
            // documents so we can move forward any persistent
            // properties (for adds) or all old properties (for
            // property changes).
            //
            // Then, for each add or remove in the request,
            // delete the associated documents from the index.
            // Note that we previously cached added documents so
            // that we can move persistent properties forward.

            // parent_child_old_props is double-nested hashtable (depth-2 tree)
            // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents)
            // FIXME: 2-level hashtable is a waste for any non-child document.
            // Replace this by a better data structure.
            Hashtable parent_child_old_props = UriFu.NewHashtable();
            TermDocs  term_docs    = secondary_reader.TermDocs();
            int       delete_count = 0;

            IEnumerable request_indexables = request.Indexables;

            foreach (Indexable indexable in request_indexables)
            {
                string uri_str = UriFu.UriToEscapedString(indexable.Uri);
                Term   term;

                // Store the necessary properties from old documents for re-addition
                if (indexable.Type == IndexableType.Add ||
                    indexable.Type == IndexableType.PropertyChange)
                {
                    term = new Term("Uri", uri_str);
                    term_docs.Seek(term);

                    Hashtable this_parent_child_props = null;

                    if (term_docs.Next())
                    {
                        this_parent_child_props = UriFu.NewHashtable();
                        this_parent_child_props [indexable.Uri] = secondary_reader.Document(term_docs.Doc());
                        parent_child_old_props [indexable.Uri]  = this_parent_child_props;
                    }

                    term = new Term("ParentUri", uri_str);
                    term_docs.Seek(term);

                    while (term_docs.Next())
                    {
                        Document doc = secondary_reader.Document(term_docs.Doc());

                        string child_uri_str = doc.Get("Uri");
                        Uri    child_uri     = UriFu.EscapedStringToUri(child_uri_str);
                        // Any valid lucene document *should* have a Uri, so no need to check for null
                        // Store the child documents too, to save persistent-properties
                        // of child documents
                        this_parent_child_props [child_uri] = doc;
                    }
                }

                // Now remove (non-remove indexables will be re-added in next block)
                Logger.Log.Debug("-{0}", indexable.DisplayUri);

                int num_delete = 0;

                term = new Term("Uri", uri_str);
                // For property changes, only secondary index is modified
                secondary_reader.DeleteDocuments(term);

                // Now remove from everywhere else (if asked to remove or if asked to add, in which case
                // we first remove and then add)
                // So we also need to remove child documents
                if (indexable.Type != IndexableType.PropertyChange)
                {
                    num_delete = primary_reader.DeleteDocuments(term);

                    // When we delete an indexable, also delete any children.
                    // FIXME: Shouldn't we also delete any children of children, etc.?
                    term        = new Term("ParentUri", uri_str);
                    num_delete += primary_reader.DeleteDocuments(term);
                    secondary_reader.DeleteDocuments(term);
                }

                // If this is a strict removal (and not a deletion that
                // we are doing in anticipation of adding something back),
                // queue up a removed receipt.
                if (indexable.Type == IndexableType.Remove)
                {
                    IndexerRemovedReceipt r;
                    r            = new IndexerRemovedReceipt(indexable.Id);
                    r.NumRemoved = num_delete;
                    receipt_queue.Add(r);
                }

                delete_count += num_delete;
            }

            term_docs.Close();

            if (HaveItemCount)
            {
                AdjustItemCount(-delete_count);
            }
            else
            {
                SetItemCount(primary_reader);
            }

            // We are now done with the readers, so we close them.
            // And also free them. Somehow not freeing them is preventing them from
            // GCed at all.
            primary_reader.Close();
            primary_reader = null;
            secondary_reader.Close();
            secondary_reader = null;

            // FIXME: If we crash at exactly this point, we are in
            // trouble.  Items will have been dropped from the index
            // without the proper replacements being added.  We can
            // hopefully fix this when we move to Lucene 2.1.

            // Step #2: Make another pass across our list of indexables
            // and write out any new documents.

            if (text_cache != null)
            {
                text_cache.BeginTransaction();
            }

            IndexWriter primary_writer, secondary_writer;

            // FIXME: Lock obtain time-out can happen here; if that happens,
            // an exception will be thrown and this method will break in the middle
            // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification
            // methods.
            primary_writer   = new IndexWriter(PrimaryStore, IndexingAnalyzer, false);
            secondary_writer = null;

            foreach (Indexable indexable in request_indexables)
            {
                // If shutdown has been started, break here
                // FIXME: Some more processing will continue, a lot of them
                // concerning receipts, but the daemon will anyway ignore receipts
                // now, what is the fastest way to stop from here ?
                if (Shutdown.ShutdownRequested)
                {
                    Log.Debug("Shutdown initiated. Breaking while flushing indexables.");
                    break;
                }

                // Receipts for removes were generated in the
                // previous block.  Now we just have to remove
                // items from the text cache.
                if (indexable.Type == IndexableType.Remove)
                {
                    if (text_cache != null)
                    {
                        text_cache.Delete(indexable.Uri);
                    }

                    continue;
                }

                IndexerAddedReceipt r;
                Hashtable           prop_change_docs = (Hashtable)parent_child_old_props [indexable.Uri];

                if (indexable.Type == IndexableType.PropertyChange)
                {
                    Logger.Log.Debug("+{0} (props only)", indexable.DisplayUri);

                    r = new IndexerAddedReceipt(indexable.Id);
                    r.PropertyChangesOnly = true;
                    receipt_queue.Add(r);

                    Document doc;
                    if (prop_change_docs == null)
                    {
                        doc = null;
                    }
                    else
                    {
                        doc = (Document)prop_change_docs [indexable.Uri];
                    }

                    Document new_doc;
                    new_doc = RewriteDocument(doc, indexable);

                    // Write out the new document...
                    if (secondary_writer == null)
                    {
                        secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                    }
                    secondary_writer.AddDocument(new_doc);

                    // Get child property change indexables...
                    ArrayList prop_change_indexables;
                    prop_change_indexables = GetChildPropertyChange(prop_change_docs, indexable);
                    // and store them; no need to delete them first, since they were already removed from the index
                    if (prop_change_indexables == null)
                    {
                        continue;
                    }

                    foreach (Indexable prop_change_indexable in prop_change_indexables)
                    {
                        Log.Debug("+{0} (props only, generated indexable)", prop_change_indexable.Uri);
                        doc     = (Document)prop_change_docs [prop_change_indexable.Uri];
                        new_doc = RewriteDocument(doc, prop_change_indexable);
                        secondary_writer.AddDocument(new_doc);
                    }

                    continue;                     // ...and proceed to the next Indexable
                }

                // If we reach this point we know we are dealing with an IndexableType.Add

                if (indexable.Type != IndexableType.Add)
                {
                    throw new Exception("When I said it was an IndexableType.Add, I meant it!");
                }

                r = AddIndexableToIndex(indexable, primary_writer, ref secondary_writer, prop_change_docs);
                if (r != null)
                {
                    receipt_queue.Add(r);
                }
            }

            if (text_cache != null)
            {
                text_cache.CommitTransaction();
            }

            if (Shutdown.ShutdownRequested)
            {
                foreach (DeferredInfo di in deferred_indexables)
                {
                    di.Cleanup();
                }
                deferred_indexables.Clear();

                foreach (Indexable indexable in request_indexables)
                {
                    indexable.Cleanup();
                }

                primary_writer.Close();
                if (secondary_writer != null)
                {
                    secondary_writer.Close();
                }

                return(null);
            }

            if (request.OptimizeIndex)
            {
                Stopwatch watch = new Stopwatch();
                Logger.Log.Debug("Optimizing {0}", IndexName);
                watch.Start();
                primary_writer.Optimize();
                if (secondary_writer == null)
                {
                    secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                }
                secondary_writer.Optimize();
                watch.Stop();
                Logger.Log.Debug("{0} optimized in {1}", IndexName, watch);
            }

            // Step #4. Close our writers and return the events to
            // indicate what has happened.

            primary_writer.Close();
            if (secondary_writer != null)
            {
                secondary_writer.Close();
            }

            // Send a single IndexerIndexablesReceipt if there were deferred indexables
            if (deferred_indexables.Count > 0)
            {
                Log.Debug("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count);
                IndexerIndexablesReceipt r = new IndexerIndexablesReceipt();
                receipt_queue.Add(r);
            }

            IndexerReceipt [] receipt_array;
            receipt_array = new IndexerReceipt [receipt_queue.Count];
            for (int i = 0; i < receipt_queue.Count; ++i)
            {
                receipt_array [i] = (IndexerReceipt)receipt_queue [i];
            }

            return(receipt_array);
        }
Пример #6
0
		override protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
		{
			// Retrieve our cached info about the file.
			CachedFileInfo info;
			info = file_info_cache [indexable.Uri] as CachedFileInfo;
			if (info == null)
				return indexable.Uri;

			file_info_cache.Remove (info.Uri);

			// Yeah, this is ghetto. If it's a file that's shared across multiple
			// indexables, only tag it with when the last indexable has been indexed.
			if (info.Shared && DecrementReferenceCount (info.Path))
				return indexable.Uri;

			// Since we know that the file has been successfully
			// indexed, update the file attributes accordingly.
			// Don't set filter information on a file if multiple 
			// indexables has been created from it.
			FileAttributes attr;
			attr = FileAttributesStore.ReadOrCreate (info.Path);

			attr.LastWriteTime = info.Mtime;

			// Don't set filter information on a file if multiple indexables has been
			// created from it.
			if (! info.Shared) {
				attr.FilterName = receipt.FilterName;
				attr.FilterVersion = receipt.FilterVersion;
			}

			if (! FileAttributesStore.Write (attr))
				Logger.Log.Warn ("Couldn't write attributes for {0}", info.Path);

			return indexable.Uri;
		}
Пример #7
0
		override protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
		{
			base.PostAddHook (indexable, receipt);
			
			// Store the note's text in the text cache.
			// By doing this in the PostAddHook, we ensure that
			// the TextCache is not modified until we are
			// sure that the note was actually indexed.
			string text;
			text = (string) note_text_cache [indexable.Uri];
			// If text == null, this is equivalent to
			// calling Delete (receipt.Uri)
			TextCache.UserCache.WriteFromString (indexable.Uri, text);
			note_text_cache.Remove (indexable.Uri);

			return indexable.Uri;
		}
Пример #8
0
        // This is mostly a copy of LuceneQueryable.Flush + FSQ.PostAddHooks/PostRemoveHook
        static bool FlushIndexer(IIndexer indexer)
        {
            IndexerRequest flushed_request;

            if (pending_request.IsEmpty)
            {
                return(false);
            }

            flushed_request = pending_request;
            pending_request = new IndexerRequest();

            IndexerReceipt [] receipts;
            receipts = indexer.Flush(flushed_request);

            // Flush will return null if it encounters a shutdown during flushing
            if (receipts == null)
            {
                return(false);
            }

            fa_store.BeginTransaction();
            bool indexer_indexable_receipt = false;

            foreach (IndexerReceipt raw_r in receipts)
            {
                if (raw_r is IndexerAddedReceipt)
                {
                    // Update the file attributes
                    IndexerAddedReceipt r = (IndexerAddedReceipt)raw_r;

                    Indexable indexable = flushed_request.RetrieveRequestIndexable(r);

                    if (indexable == null)
                    {
                        Logger.Log.Debug("Should not happen! Previously requested indexable with id #{0} has eloped!", r.Id);
                        continue;
                    }

                    // We don't need to write out any file attributes for
                    // children.
                    if (indexable.ParentUri != null)
                    {
                        continue;
                    }

                    string path = indexable.Uri.LocalPath;

                    FileAttributes attr;
                    attr = fa_store.ReadOrCreate(path);

                    attr.LastWriteTime = indexable.Timestamp;
                    attr.FilterName    = r.FilterName;
                    attr.FilterVersion = r.FilterVersion;

                    fa_store.Write(attr);
                }
                else if (raw_r is IndexerRemovedReceipt)
                {
                    // Update the file attributes
                    IndexerRemovedReceipt r = (IndexerRemovedReceipt)raw_r;

                    Indexable indexable = flushed_request.RetrieveRequestIndexable(r);
                    if (indexable == null)                       // Should never happen
                    {
                        Log.Warn("Unable to match indexable-remove #{0} to any request!", r.Id);
                        continue;
                    }

                    string path = indexable.Uri.LocalPath;
                    Logger.Log.Debug("Removing: '{0}'", path);
                    fa_store.Drop(path);
                }
                else if (raw_r is IndexerIndexablesReceipt)
                {
                    indexer_indexable_receipt = true;
                }
            }

            pending_request.DeferredIndexables = flushed_request.DeferredIndexables;

            // Reschedule if some indexable generated more indexables
            if (indexer_indexable_receipt)
            {
                pending_request.ContinueIndexing = true;
                return(true);
            }

            fa_store.CommitTransaction();
            return(false);
        }
		private IndexerReceipt [] FlushGeneratedIndexables_Unlocked (IndexerRequest request)
		{
			int num_indexed = 0;
			ArrayList receipt_queue;
			receipt_queue = new ArrayList ();

			if (text_cache != null)
				text_cache.BeginTransaction ();
				
			IndexWriter primary_writer, secondary_writer;
			primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false);
			secondary_writer = null;
			IndexerAddedReceipt r;

			Log.Debug ("Continuing indexing generated indexables from {0} indexables", deferred_indexables.Count);

			// Access using index so that we can add more deferred_indexable at the front
			// deferred_indexables are added at the front and fetched from the front like a stack
			while (deferred_indexables.Count > 0) {
				DeferredInfo di = (DeferredInfo) deferred_indexables [0];

				if (di.Indexable.LocalState ["HasNextIndexable"] != null) {
					// Finally, good to index
					// Should we do a sanity check ? deferred_indexables [0] =?= di
					deferred_indexables.RemoveAt (0);

					Document persistent_prop_doc = null;
					if (di.PersistentPropDocs != null)
						persistent_prop_doc = (Document) di.PersistentPropDocs [di.Indexable.Uri];
					
					if (di.Indexable.DisplayUri != di.Indexable.ContentUri)
						Log.Debug ("+{0} ({1}) [deferred]", di.Indexable.DisplayUri, di.Indexable.ContentUri);
					else
						Log.Debug ("+{0} [deferred]", di.Indexable.DisplayUri);

					AddDocumentToIndex (di.Indexable, persistent_prop_doc, primary_writer, ref secondary_writer);

					// Add the receipt if the indexable was submitted and not generated
					if (di.Indexable.LocalState ["GeneratedIndexable"] == null) {
						r = new IndexerAddedReceipt (di.Indexable.Id);
						r.FilterName = di.Filter.GetType ().ToString ();
						r.FilterVersion = di.Filter.Version;
						receipt_queue.Add (r);
					}

					// Cleanup, and text cache maintenance.
					di.Cleanup ();

					if (disable_textcache && text_cache != null)
						text_cache.Delete (di.Indexable.Uri);

					num_indexed ++;
					continue;
				}

				Log.Debug ("Processing deferred indexable from {0}", di.Indexable.DisplayUri);
				bool next = false;
				while (! next && ! Shutdown.ShutdownRequested && num_indexed <= RequestFlushThreshold) {
					Indexable generated_indexable = null;

					bool next_indexable = false;
					try {
						next_indexable = di.Filter.GenerateNextIndexable (out generated_indexable);
					} catch (Exception e) {
						Log.Error (e, "Error while generating next indexable from {0}", di.Indexable.DisplayUri);
					}

					if (! next_indexable) {
						// Mark it for indexing and leave it in the stack
						di.Indexable.LocalState ["HasNextIndexable"] = false;
						next = true;
						break;
					}

					if (generated_indexable == null)
						continue;

					Log.Debug ("Adding generated indexable {0}", generated_indexable.DisplayUri);

					// Mark this indexable
					generated_indexable.LocalState ["GeneratedIndexable"] = true;

					// IndexerGenerated indexables have a common parenturi, which has been used before
					// to remove all docs from the lucene index with that parenturi. So, now we can safely
					// go ahead and just add the new information.
					r = AddIndexableToIndex (generated_indexable, primary_writer, ref secondary_writer, di.PersistentPropDocs);
					// But do not add r to the receipt queue, since this was generated
					if (r != null) // null receipt is returned if generated_indexable is deferred
						num_indexed ++;
				}

				if (Shutdown.ShutdownRequested || num_indexed > RequestFlushThreshold)
					break;
			}

			if (text_cache != null)
				text_cache.CommitTransaction ();

			if (Shutdown.ShutdownRequested) {
				foreach (DeferredInfo di in deferred_indexables)
					di.Cleanup ();
				deferred_indexables.Clear ();

				primary_writer.Close ();
				if (secondary_writer != null)
					secondary_writer.Close ();
			
				return null;
			}

			primary_writer.Close ();
			if (secondary_writer != null)
				secondary_writer.Close ();
			
			// Send a single IndexerIndexablesReceipt if there were deferred indexables
			if (deferred_indexables.Count > 0) {
				Log.Debug ("{0} more indexable-generating indexable remainding to index; asking daemon to schedule their indexing.", deferred_indexables.Count);
				IndexerIndexablesReceipt paused_receipt = new IndexerIndexablesReceipt ();
				receipt_queue.Add (paused_receipt);
			}

			IndexerReceipt [] receipt_array;
			receipt_array = new IndexerReceipt [receipt_queue.Count];
			for (int i = 0; i < receipt_queue.Count; ++i)
				receipt_array [i] = (IndexerReceipt) receipt_queue [i];
			
			return receipt_array;
		}
Пример #10
0
		private IndexerAddedReceipt AddIndexableToIndex (Indexable indexable,
								 IndexWriter primary_writer,
					          		 ref IndexWriter secondary_writer,
						  		 Hashtable prop_change_docs)
		{
			Filter filter = null;
			if (FileFilterNotifier != null)
				FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, null); // We don't know what filter yet.

			// If we have content, try to find a filter
			// we we can use to process the indexable
			bool filter_content = false;
			try {
				filter_content = FilterFactory.FilterIndexable (indexable, (disable_textcache ? null : text_cache), out filter);
			} catch { }

			if (! filter_content) {
				indexable.NoContent = true;
				filter = null;
			}

			if (FileFilterNotifier != null)
				FileFilterNotifier (indexable.DisplayUri, indexable.ContentUri, filter); // Update with our filter

			IndexerAddedReceipt r = new IndexerAddedReceipt (indexable.Id);

			if (filter != null) {
				if (filter.HasGeneratedIndexable) {
					Log.Debug ("{0} might generate indexables from {1}; deferring until later",
						   indexable.DisplayUri,
						   filter.GetType ().ToString ());

					// This indexable can potentially generate indexables,
					// so defer its indexing

					DeferredInfo di;
					di = new DeferredInfo (indexable, filter, prop_change_docs);
					deferred_indexables.Insert (0, di);

					// Since we are deferred, continue. Do not cleanup indexable or remove text-cache yet.
					// FIXME: Make sure all indexable.Cleanup is called for all indexables if
					// shutdown is signalled.
					if (FileFilterNotifier != null)
						FileFilterNotifier (null, null, null); // reset

					// Return null to signal the indexable was deferred
					return null;
				}

				// Force the clean-up of temporary files, just in case.
				// FIXME: I am not sure if the cleanup should happen now.
				// What is the difference between filter.Cleanup and Indexable.Cleanup ?
				filter.Cleanup ();

				r.FilterName = filter.GetType ().ToString ();
				r.FilterVersion = filter.Version;
			}

			// If this indexables is not deferred, add it to the index.
			if (indexable.DisplayUri != indexable.ContentUri)
				Log.Debug ("+{0} ({1})", indexable.DisplayUri, indexable.ContentUri);
			else
				Log.Debug ("+{0}", indexable.DisplayUri);

			Document persistent_prop_doc = null;
			if (prop_change_docs != null)
				persistent_prop_doc = (Document) prop_change_docs [indexable.Uri];
			AddDocumentToIndex (indexable, persistent_prop_doc, primary_writer, ref secondary_writer);

			if (FileFilterNotifier != null)
				FileFilterNotifier (null, null, null); // reset

			// Clean up any temporary files associated with filtering this indexable.
			indexable.Cleanup ();

			// Remove any existing text cache for this item
			if (disable_textcache && text_cache != null)
				text_cache.Delete (indexable.Uri);

			return r;
		}
Пример #11
0
		private IndexerReceipt [] Flush_Unlocked (IndexerRequest request)
		{
			ArrayList receipt_queue;
			receipt_queue = new ArrayList ();

			IndexReader primary_reader, secondary_reader;
			primary_reader = IndexReader.Open (PrimaryStore);
			secondary_reader = IndexReader.Open (SecondaryStore);

			// Step #1: Make our first pass over the list of
			// indexables that make up our request.  For each add
			// or property change in the request, get the Lucene
			// documents so we can move forward any persistent
			// properties (for adds) or all old properties (for
			// property changes).
			//
			// Then, for each add or remove in the request,
			// delete the associated documents from the index.
			// Note that we previously cached added documents so
			// that we can move persistent properties forward.

			// parent_child_old_props is double-nested hashtable (depth-2 tree)
			// indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents)
			// FIXME: 2-level hashtable is a waste for any non-child document.
			// Replace this by a better data structure.
			Hashtable parent_child_old_props = UriFu.NewHashtable ();
			TermDocs term_docs = secondary_reader.TermDocs ();
			int delete_count = 0;

			IEnumerable request_indexables = request.Indexables;

			foreach (Indexable indexable in request_indexables) {

				string uri_str = UriFu.UriToEscapedString (indexable.Uri);
				Term term;

				// Store the necessary properties from old documents for re-addition
				if (indexable.Type == IndexableType.Add ||
				    indexable.Type == IndexableType.PropertyChange) {

					term = new Term ("Uri", uri_str);
					term_docs.Seek (term);

					Hashtable this_parent_child_props = null;

					if (term_docs.Next ()) {
						this_parent_child_props = UriFu.NewHashtable ();
						this_parent_child_props [indexable.Uri] = secondary_reader.Document (term_docs.Doc ());
						parent_child_old_props [indexable.Uri] = this_parent_child_props;
					}

					term = new Term ("ParentUri", uri_str);
					term_docs.Seek (term);

					while (term_docs.Next ()) {
						Document doc = secondary_reader.Document (term_docs.Doc ());

						string child_uri_str = doc.Get ("Uri");
						Uri child_uri = UriFu.EscapedStringToUri (child_uri_str);
						// Any valid lucene document *should* have a Uri, so no need to check for null
						// Store the child documents too, to save persistent-properties
						// of child documents
						this_parent_child_props [child_uri] = doc;
					}
				}

				// Now remove (non-remove indexables will be re-added in next block)
				Logger.Log.Debug ("-{0}", indexable.DisplayUri);
				
				int num_delete = 0;

				term = new Term ("Uri", uri_str);
				// For property changes, only secondary index is modified
				secondary_reader.DeleteDocuments (term);

				// Now remove from everywhere else (if asked to remove or if asked to add, in which case
				// we first remove and then add)
				// So we also need to remove child documents
				if (indexable.Type != IndexableType.PropertyChange) {
					num_delete = primary_reader.DeleteDocuments (term);

					// When we delete an indexable, also delete any children.
					// FIXME: Shouldn't we also delete any children of children, etc.?
					term = new Term ("ParentUri", uri_str);
					num_delete += primary_reader.DeleteDocuments (term);
					secondary_reader.DeleteDocuments (term);
				}

				// If this is a strict removal (and not a deletion that
				// we are doing in anticipation of adding something back),
				// queue up a removed receipt.
				if (indexable.Type == IndexableType.Remove) {
					IndexerRemovedReceipt r;
					r = new IndexerRemovedReceipt (indexable.Id);
					r.NumRemoved = num_delete;
					receipt_queue.Add (r);
				}

				delete_count += num_delete;
			}

			term_docs.Close ();

			if (HaveItemCount)
				AdjustItemCount (-delete_count);
			else
				SetItemCount (primary_reader);
			
			// We are now done with the readers, so we close them.
			// And also free them. Somehow not freeing them is preventing them from
			// GCed at all.
			primary_reader.Close ();
			primary_reader = null;
			secondary_reader.Close ();
			secondary_reader = null;

			// FIXME: If we crash at exactly this point, we are in
			// trouble.  Items will have been dropped from the index
			// without the proper replacements being added.  We can
			// hopefully fix this when we move to Lucene 2.1.

			// Step #2: Make another pass across our list of indexables
			// and write out any new documents.

			if (text_cache != null)
				text_cache.BeginTransaction ();
				
			IndexWriter primary_writer, secondary_writer;
			// FIXME: Lock obtain time-out can happen here; if that happens,
			// an exception will be thrown and this method will break in the middle
			// leaving IndexWriters unclosed! Same for any Lucene.Net-index modification
			// methods.
			primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false);
			secondary_writer = null;

			foreach (Indexable indexable in request_indexables) {
				// If shutdown has been started, break here
				// FIXME: Some more processing will continue, a lot of them
				// concerning receipts, but the daemon will anyway ignore receipts
				// now, what is the fastest way to stop from here ?
				if (Shutdown.ShutdownRequested) {
					Log.Debug ("Shutdown initiated. Breaking while flushing indexables.");
					break;
				}

				// Receipts for removes were generated in the
				// previous block.  Now we just have to remove
				// items from the text cache.
				if (indexable.Type == IndexableType.Remove) {
					if (text_cache != null)
						text_cache.Delete (indexable.Uri);

					continue;
				}

				IndexerAddedReceipt r;
				Hashtable prop_change_docs = (Hashtable) parent_child_old_props [indexable.Uri];

				if (indexable.Type == IndexableType.PropertyChange) {

					Logger.Log.Debug ("+{0} (props only)", indexable.DisplayUri);

					r = new IndexerAddedReceipt (indexable.Id);
					r.PropertyChangesOnly = true;
					receipt_queue.Add (r);

					Document doc;
					if (prop_change_docs == null)
						doc = null;
					else
						doc = (Document) prop_change_docs [indexable.Uri];

					Document new_doc;
					new_doc = RewriteDocument (doc, indexable);

					// Write out the new document...
					if (secondary_writer == null)
						secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false);
					secondary_writer.AddDocument (new_doc);

					// Get child property change indexables...
					ArrayList prop_change_indexables;
					prop_change_indexables = GetChildPropertyChange (prop_change_docs, indexable);
					// and store them; no need to delete them first, since they were already removed from the index
					if (prop_change_indexables == null)
						continue;

					foreach (Indexable prop_change_indexable in prop_change_indexables) {
						Log.Debug ("+{0} (props only, generated indexable)", prop_change_indexable.Uri);
						doc = (Document) prop_change_docs [prop_change_indexable.Uri];
						new_doc = RewriteDocument (doc, prop_change_indexable);
						secondary_writer.AddDocument (new_doc);
					}

					continue; // ...and proceed to the next Indexable
				}

				// If we reach this point we know we are dealing with an IndexableType.Add

				if (indexable.Type != IndexableType.Add)
					throw new Exception ("When I said it was an IndexableType.Add, I meant it!");

				r = AddIndexableToIndex (indexable, primary_writer, ref secondary_writer, prop_change_docs);
				if (r != null)
					receipt_queue.Add (r);
			}

			if (text_cache != null)
				text_cache.CommitTransaction ();

			if (Shutdown.ShutdownRequested) {
				foreach (DeferredInfo di in deferred_indexables)
					di.Cleanup ();
				deferred_indexables.Clear ();

				foreach (Indexable indexable in request_indexables)
					indexable.Cleanup ();

				primary_writer.Close ();
				if (secondary_writer != null)
					secondary_writer.Close ();

				return null;
			}

			if (request.OptimizeIndex) {
				Stopwatch watch = new Stopwatch ();
				Logger.Log.Debug ("Optimizing {0}", IndexName);
				watch.Start ();
				primary_writer.Optimize ();
				if (secondary_writer == null)
					secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false);
				secondary_writer.Optimize ();
				watch.Stop ();
				Logger.Log.Debug ("{0} optimized in {1}", IndexName, watch);
			}

			// Step #4. Close our writers and return the events to
			// indicate what has happened.

			primary_writer.Close ();
			if (secondary_writer != null)
				secondary_writer.Close ();

			// Send a single IndexerIndexablesReceipt if there were deferred indexables
			if (deferred_indexables.Count > 0) {
				Log.Debug ("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count);
				IndexerIndexablesReceipt r = new IndexerIndexablesReceipt ();
				receipt_queue.Add (r);
			}

			IndexerReceipt [] receipt_array;
			receipt_array = new IndexerReceipt [receipt_queue.Count];
			for (int i = 0; i < receipt_queue.Count; ++i)
				receipt_array [i] = (IndexerReceipt) receipt_queue [i];

			return receipt_array;
		}
		protected override Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
		{
			base.PostAddHook (indexable, receipt);
			
			if (indexable_text_cache.ContainsKey (indexable.Uri)) {
				string text = (string)indexable_text_cache [indexable.Uri];
				TextCache.UserCache.WriteFromString (indexable.Uri, text);			       

				indexable_text_cache.Remove (indexable.Uri);
			}

			return indexable.Uri;
		}
Пример #13
0
		// If we are remapping Uris, indexables should be added to the
		// index with the internal Uri attached.  This the receipt
		// will come back w/ an internal Uri.  In order for change
		// notification to work correctly, we have to map it to
		// an external Uri.
		// Return the remapped uri.
		virtual protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
		{
			// By default, remapped uri is the indexable uri
			return indexable.Uri;
		}
Пример #14
0
		override protected Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
		{
			// We don't have anything to do if we are dealing with a child indexable
			if (indexable.ParentUri != null)
				return indexable.DisplayUri;

			string xmpfile_path = (string) indexable.LocalState ["XmpFilePath"];
			if (xmpfile_path != null) {
				// Get the uid of the xmp file
				string xmp_id_string = (string) indexable.LocalState ["XmpGuid"];
				Guid xmp_id = GuidFu.FromShortString (xmp_id_string);

				FileAttributes xmp_attr;
				xmp_attr = FileAttributesStore.ReadOrCreate (xmpfile_path, xmp_id);
				xmp_attr.Path = xmpfile_path;
				// Potential race here, attr->LastWriteTime should really be the last write
				// time as seen when this indexable was added
				xmp_attr.LastWriteTime = File.GetLastWriteTimeUtc (xmpfile_path);

				// Add filter information, otherwise the xmp file will be indexed on each recrawl
				xmp_attr.FilterName = XmpFile.FilterName;
				xmp_attr.FilterVersion = XmpFile.FilterVersion;

				// Write file attributes for xmp file
				if (Debug)
					Log.Debug ("Writing attributes for xmp {0}({1})", xmpfile_path, xmp_id_string);

				FileAttributesStore.Write (xmp_attr);
				uid_manager.ForgetNewId (xmpfile_path);
			}

			if (indexable.Type == IndexableType.PropertyChange) {
				// If we were moved, remap to our *old* external Uri
				// to make notification work out properly.  Otherwise,
				// this is an in-place property change and we don't
				// need to do anything.

				Uri remapped_uri = indexable.DisplayUri;
				string last_known_path;
				last_known_path = (string) indexable.LocalState ["LastKnownPath"];

				if (last_known_path != null) {
					remapped_uri = UriFu.PathToFileUri (last_known_path);
					Logger.Log.Debug ("Last known path is {0}", last_known_path);

					// This rename is now in the index, so we no
					// longer need to keep track of the uid in memory.
					uid_manager.ForgetNewId (last_known_path);
				} else if (xmpfile_path != null) {
					// Get the correct uri for notifications
					string basefile_path = (string) indexable.LocalState ["BaseFilePath"];
					remapped_uri = UriFu.PathToFileUri (basefile_path);
				}

				return remapped_uri;
			}

			string path;
			path = (string) indexable.LocalState ["Path"];

			if (Debug)
				Log.Debug ("PostAddHook for {0} ({1})", indexable.Uri, path);

			uid_manager.ForgetNewId (path);

			DirectoryModel parent;
			parent = indexable.LocalState ["Parent"] as DirectoryModel;

			// The parent directory might have run away since we were indexed
			if (parent != null && ! parent.IsAttached)
				return indexable.DisplayUri;

			Guid unique_id;
			unique_id = GuidFu.FromUri (indexable.Uri);

			FileAttributes attr;
			attr = FileAttributesStore.ReadOrCreate (path, unique_id);

			attr.Path = path;
			attr.LastWriteTime = indexable.Timestamp;
			
			attr.FilterName = receipt.FilterName;
			attr.FilterVersion = receipt.FilterVersion;

			if (indexable.LocalState ["IsWalkable"] != null) {
				string name;
				name = (string) indexable.LocalState ["Name"];

				if (! RegisterDirectory (name, parent, attr))
					return indexable.DisplayUri;
			}

			FileAttributesStore.Write (attr);

			// Return the remapped Uri so that change notification will work properly
			return UriFu.PathToFileUri (path);
		}
		protected override Uri PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
		{
			FileInfo meta_file = indexable.LocalState ["MetaFile"] as FileInfo;
			if (meta_file == null)
				return indexable.Uri;

			meta_file.Delete ();

			lock (pending_files)
				pending_files.Remove (indexable.ContentUri.LocalPath);

			return indexable.Uri;
		}