コード例 #1
0
ファイル: BuildIndex.cs プロジェクト: ArsenShnurkov/beagle-1
        // This is mostly a copy of LuceneQueryable.Flush + FSQ.PostAddHooks/PostRemoveHook
        static bool FlushIndexer(IIndexer indexer)
        {
            IndexerRequest flushed_request;

            if (pending_request.IsEmpty)
            {
                return(false);
            }

            flushed_request = pending_request;
            pending_request = new IndexerRequest();

            IndexerReceipt [] receipts;
            receipts = indexer.Flush(flushed_request);

            // Flush will return null if it encounters a shutdown during flushing
            if (receipts == null)
            {
                return(false);
            }

            fa_store.BeginTransaction();
            bool indexer_indexable_receipt = false;

            foreach (IndexerReceipt raw_r in receipts)
            {
                if (raw_r is IndexerAddedReceipt)
                {
                    // Update the file attributes
                    IndexerAddedReceipt r = (IndexerAddedReceipt)raw_r;

                    Indexable indexable = flushed_request.RetrieveRequestIndexable(r);

                    if (indexable == null)
                    {
                        Logger.Log.Debug("Should not happen! Previously requested indexable with id #{0} has eloped!", r.Id);
                        continue;
                    }

                    // We don't need to write out any file attributes for
                    // children.
                    if (indexable.ParentUri != null)
                    {
                        continue;
                    }

                    string path = indexable.Uri.LocalPath;

                    FileAttributes attr;
                    attr = fa_store.ReadOrCreate(path);

                    attr.LastWriteTime = indexable.Timestamp;
                    attr.FilterName    = r.FilterName;
                    attr.FilterVersion = r.FilterVersion;

                    fa_store.Write(attr);
                }
                else if (raw_r is IndexerRemovedReceipt)
                {
                    // Update the file attributes
                    IndexerRemovedReceipt r = (IndexerRemovedReceipt)raw_r;

                    Indexable indexable = flushed_request.RetrieveRequestIndexable(r);
                    if (indexable == null)                       // Should never happen
                    {
                        Log.Warn("Unable to match indexable-remove #{0} to any request!", r.Id);
                        continue;
                    }

                    string path = indexable.Uri.LocalPath;
                    Logger.Log.Debug("Removing: '{0}'", path);
                    fa_store.Drop(path);
                }
                else if (raw_r is IndexerIndexablesReceipt)
                {
                    indexer_indexable_receipt = true;
                }
            }

            pending_request.DeferredIndexables = flushed_request.DeferredIndexables;

            // Reschedule if some indexable generated more indexables
            if (indexer_indexable_receipt)
            {
                pending_request.ContinueIndexing = true;
                return(true);
            }

            fa_store.CommitTransaction();
            return(false);
        }
コード例 #2
0
        private IndexerReceipt [] Flush_Unlocked(IndexerRequest request)
        {
            ArrayList receipt_queue;

            receipt_queue = new ArrayList();

            IndexReader primary_reader, secondary_reader;

            primary_reader   = IndexReader.Open(PrimaryStore);
            secondary_reader = IndexReader.Open(SecondaryStore);

            // Step #1: Make our first pass over the list of
            // indexables that make up our request.  For each add
            // or property change in the request, get the Lucene
            // documents so we can move forward any persistent
            // properties (for adds) or all old properties (for
            // property changes).
            //
            // Then, for each add or remove in the request,
            // delete the associated documents from the index.
            // Note that we previously cached added documents so
            // that we can move persistent properties forward.

            // parent_child_old_props is double-nested hashtable (depth-2 tree)
            // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents)
            // FIXME: 2-level hashtable is a waste for any non-child document.
            // Replace this by a better data structure.
            Hashtable parent_child_old_props = UriFu.NewHashtable();
            TermDocs  term_docs    = secondary_reader.TermDocs();
            int       delete_count = 0;

            IEnumerable request_indexables = request.Indexables;

            foreach (Indexable indexable in request_indexables)
            {
                string uri_str = UriFu.UriToEscapedString(indexable.Uri);
                Term   term;

                // Store the necessary properties from old documents for re-addition
                if (indexable.Type == IndexableType.Add ||
                    indexable.Type == IndexableType.PropertyChange)
                {
                    term = new Term("Uri", uri_str);
                    term_docs.Seek(term);

                    Hashtable this_parent_child_props = null;

                    if (term_docs.Next())
                    {
                        this_parent_child_props = UriFu.NewHashtable();
                        this_parent_child_props [indexable.Uri] = secondary_reader.Document(term_docs.Doc());
                        parent_child_old_props [indexable.Uri]  = this_parent_child_props;
                    }

                    term = new Term("ParentUri", uri_str);
                    term_docs.Seek(term);

                    while (term_docs.Next())
                    {
                        Document doc = secondary_reader.Document(term_docs.Doc());

                        string child_uri_str = doc.Get("Uri");
                        Uri    child_uri     = UriFu.EscapedStringToUri(child_uri_str);
                        // Any valid lucene document *should* have a Uri, so no need to check for null
                        // Store the child documents too, to save persistent-properties
                        // of child documents
                        this_parent_child_props [child_uri] = doc;
                    }
                }

                // Now remove (non-remove indexables will be re-added in next block)
                Logger.Log.Debug("-{0}", indexable.DisplayUri);

                int num_delete = 0;

                term = new Term("Uri", uri_str);
                // For property changes, only secondary index is modified
                secondary_reader.DeleteDocuments(term);

                // Now remove from everywhere else (if asked to remove or if asked to add, in which case
                // we first remove and then add)
                // So we also need to remove child documents
                if (indexable.Type != IndexableType.PropertyChange)
                {
                    num_delete = primary_reader.DeleteDocuments(term);

                    // When we delete an indexable, also delete any children.
                    // FIXME: Shouldn't we also delete any children of children, etc.?
                    term        = new Term("ParentUri", uri_str);
                    num_delete += primary_reader.DeleteDocuments(term);
                    secondary_reader.DeleteDocuments(term);
                }

                // If this is a strict removal (and not a deletion that
                // we are doing in anticipation of adding something back),
                // queue up a removed receipt.
                if (indexable.Type == IndexableType.Remove)
                {
                    IndexerRemovedReceipt r;
                    r            = new IndexerRemovedReceipt(indexable.Id);
                    r.NumRemoved = num_delete;
                    receipt_queue.Add(r);
                }

                delete_count += num_delete;
            }

            term_docs.Close();

            if (HaveItemCount)
            {
                AdjustItemCount(-delete_count);
            }
            else
            {
                SetItemCount(primary_reader);
            }

            // We are now done with the readers, so we close them.
            // And also free them. Somehow not freeing them is preventing them from
            // GCed at all.
            primary_reader.Close();
            primary_reader = null;
            secondary_reader.Close();
            secondary_reader = null;

            // FIXME: If we crash at exactly this point, we are in
            // trouble.  Items will have been dropped from the index
            // without the proper replacements being added.  We can
            // hopefully fix this when we move to Lucene 2.1.

            // Step #2: Make another pass across our list of indexables
            // and write out any new documents.

            if (text_cache != null)
            {
                text_cache.BeginTransaction();
            }

            IndexWriter primary_writer, secondary_writer;

            // FIXME: Lock obtain time-out can happen here; if that happens,
            // an exception will be thrown and this method will break in the middle
            // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification
            // methods.
            primary_writer   = new IndexWriter(PrimaryStore, IndexingAnalyzer, false);
            secondary_writer = null;

            foreach (Indexable indexable in request_indexables)
            {
                // If shutdown has been started, break here
                // FIXME: Some more processing will continue, a lot of them
                // concerning receipts, but the daemon will anyway ignore receipts
                // now, what is the fastest way to stop from here ?
                if (Shutdown.ShutdownRequested)
                {
                    Log.Debug("Shutdown initiated. Breaking while flushing indexables.");
                    break;
                }

                // Receipts for removes were generated in the
                // previous block.  Now we just have to remove
                // items from the text cache.
                if (indexable.Type == IndexableType.Remove)
                {
                    if (text_cache != null)
                    {
                        text_cache.Delete(indexable.Uri);
                    }

                    continue;
                }

                IndexerAddedReceipt r;
                Hashtable           prop_change_docs = (Hashtable)parent_child_old_props [indexable.Uri];

                if (indexable.Type == IndexableType.PropertyChange)
                {
                    Logger.Log.Debug("+{0} (props only)", indexable.DisplayUri);

                    r = new IndexerAddedReceipt(indexable.Id);
                    r.PropertyChangesOnly = true;
                    receipt_queue.Add(r);

                    Document doc;
                    if (prop_change_docs == null)
                    {
                        doc = null;
                    }
                    else
                    {
                        doc = (Document)prop_change_docs [indexable.Uri];
                    }

                    Document new_doc;
                    new_doc = RewriteDocument(doc, indexable);

                    // Write out the new document...
                    if (secondary_writer == null)
                    {
                        secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                    }
                    secondary_writer.AddDocument(new_doc);

                    // Get child property change indexables...
                    ArrayList prop_change_indexables;
                    prop_change_indexables = GetChildPropertyChange(prop_change_docs, indexable);
                    // and store them; no need to delete them first, since they were already removed from the index
                    if (prop_change_indexables == null)
                    {
                        continue;
                    }

                    foreach (Indexable prop_change_indexable in prop_change_indexables)
                    {
                        Log.Debug("+{0} (props only, generated indexable)", prop_change_indexable.Uri);
                        doc     = (Document)prop_change_docs [prop_change_indexable.Uri];
                        new_doc = RewriteDocument(doc, prop_change_indexable);
                        secondary_writer.AddDocument(new_doc);
                    }

                    continue;                     // ...and proceed to the next Indexable
                }

                // If we reach this point we know we are dealing with an IndexableType.Add

                if (indexable.Type != IndexableType.Add)
                {
                    throw new Exception("When I said it was an IndexableType.Add, I meant it!");
                }

                r = AddIndexableToIndex(indexable, primary_writer, ref secondary_writer, prop_change_docs);
                if (r != null)
                {
                    receipt_queue.Add(r);
                }
            }

            if (text_cache != null)
            {
                text_cache.CommitTransaction();
            }

            if (Shutdown.ShutdownRequested)
            {
                foreach (DeferredInfo di in deferred_indexables)
                {
                    di.Cleanup();
                }
                deferred_indexables.Clear();

                foreach (Indexable indexable in request_indexables)
                {
                    indexable.Cleanup();
                }

                primary_writer.Close();
                if (secondary_writer != null)
                {
                    secondary_writer.Close();
                }

                return(null);
            }

            if (request.OptimizeIndex)
            {
                Stopwatch watch = new Stopwatch();
                Logger.Log.Debug("Optimizing {0}", IndexName);
                watch.Start();
                primary_writer.Optimize();
                if (secondary_writer == null)
                {
                    secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                }
                secondary_writer.Optimize();
                watch.Stop();
                Logger.Log.Debug("{0} optimized in {1}", IndexName, watch);
            }

            // Step #4. Close our writers and return the events to
            // indicate what has happened.

            primary_writer.Close();
            if (secondary_writer != null)
            {
                secondary_writer.Close();
            }

            // Send a single IndexerIndexablesReceipt if there were deferred indexables
            if (deferred_indexables.Count > 0)
            {
                Log.Debug("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count);
                IndexerIndexablesReceipt r = new IndexerIndexablesReceipt();
                receipt_queue.Add(r);
            }

            IndexerReceipt [] receipt_array;
            receipt_array = new IndexerReceipt [receipt_queue.Count];
            for (int i = 0; i < receipt_queue.Count; ++i)
            {
                receipt_array [i] = (IndexerReceipt)receipt_queue [i];
            }

            return(receipt_array);
        }
コード例 #3
0
		private IndexerReceipt [] Flush_Unlocked (IndexerRequest request)
		{
			ArrayList receipt_queue;
			receipt_queue = new ArrayList ();

			IndexReader primary_reader, secondary_reader;
			primary_reader = IndexReader.Open (PrimaryStore);
			secondary_reader = IndexReader.Open (SecondaryStore);

			// Step #1: Make our first pass over the list of
			// indexables that make up our request.  For each add
			// or property change in the request, get the Lucene
			// documents so we can move forward any persistent
			// properties (for adds) or all old properties (for
			// property changes).
			//
			// Then, for each add or remove in the request,
			// delete the associated documents from the index.
			// Note that we previously cached added documents so
			// that we can move persistent properties forward.

			// parent_child_old_props is double-nested hashtable (depth-2 tree)
			// indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents)
			// FIXME: 2-level hashtable is a waste for any non-child document.
			// Replace this by a better data structure.
			Hashtable parent_child_old_props = UriFu.NewHashtable ();
			TermDocs term_docs = secondary_reader.TermDocs ();
			int delete_count = 0;

			IEnumerable request_indexables = request.Indexables;

			foreach (Indexable indexable in request_indexables) {

				string uri_str = UriFu.UriToEscapedString (indexable.Uri);
				Term term;

				// Store the necessary properties from old documents for re-addition
				if (indexable.Type == IndexableType.Add ||
				    indexable.Type == IndexableType.PropertyChange) {

					term = new Term ("Uri", uri_str);
					term_docs.Seek (term);

					Hashtable this_parent_child_props = null;

					if (term_docs.Next ()) {
						this_parent_child_props = UriFu.NewHashtable ();
						this_parent_child_props [indexable.Uri] = secondary_reader.Document (term_docs.Doc ());
						parent_child_old_props [indexable.Uri] = this_parent_child_props;
					}

					term = new Term ("ParentUri", uri_str);
					term_docs.Seek (term);

					while (term_docs.Next ()) {
						Document doc = secondary_reader.Document (term_docs.Doc ());

						string child_uri_str = doc.Get ("Uri");
						Uri child_uri = UriFu.EscapedStringToUri (child_uri_str);
						// Any valid lucene document *should* have a Uri, so no need to check for null
						// Store the child documents too, to save persistent-properties
						// of child documents
						this_parent_child_props [child_uri] = doc;
					}
				}

				// Now remove (non-remove indexables will be re-added in next block)
				Logger.Log.Debug ("-{0}", indexable.DisplayUri);
				
				int num_delete = 0;

				term = new Term ("Uri", uri_str);
				// For property changes, only secondary index is modified
				secondary_reader.DeleteDocuments (term);

				// Now remove from everywhere else (if asked to remove or if asked to add, in which case
				// we first remove and then add)
				// So we also need to remove child documents
				if (indexable.Type != IndexableType.PropertyChange) {
					num_delete = primary_reader.DeleteDocuments (term);

					// When we delete an indexable, also delete any children.
					// FIXME: Shouldn't we also delete any children of children, etc.?
					term = new Term ("ParentUri", uri_str);
					num_delete += primary_reader.DeleteDocuments (term);
					secondary_reader.DeleteDocuments (term);
				}

				// If this is a strict removal (and not a deletion that
				// we are doing in anticipation of adding something back),
				// queue up a removed receipt.
				if (indexable.Type == IndexableType.Remove) {
					IndexerRemovedReceipt r;
					r = new IndexerRemovedReceipt (indexable.Id);
					r.NumRemoved = num_delete;
					receipt_queue.Add (r);
				}

				delete_count += num_delete;
			}

			term_docs.Close ();

			if (HaveItemCount)
				AdjustItemCount (-delete_count);
			else
				SetItemCount (primary_reader);
			
			// We are now done with the readers, so we close them.
			// And also free them. Somehow not freeing them is preventing them from
			// GCed at all.
			primary_reader.Close ();
			primary_reader = null;
			secondary_reader.Close ();
			secondary_reader = null;

			// FIXME: If we crash at exactly this point, we are in
			// trouble.  Items will have been dropped from the index
			// without the proper replacements being added.  We can
			// hopefully fix this when we move to Lucene 2.1.

			// Step #2: Make another pass across our list of indexables
			// and write out any new documents.

			if (text_cache != null)
				text_cache.BeginTransaction ();
				
			IndexWriter primary_writer, secondary_writer;
			// FIXME: Lock obtain time-out can happen here; if that happens,
			// an exception will be thrown and this method will break in the middle
			// leaving IndexWriters unclosed! Same for any Lucene.Net-index modification
			// methods.
			primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false);
			secondary_writer = null;

			foreach (Indexable indexable in request_indexables) {
				// If shutdown has been started, break here
				// FIXME: Some more processing will continue, a lot of them
				// concerning receipts, but the daemon will anyway ignore receipts
				// now, what is the fastest way to stop from here ?
				if (Shutdown.ShutdownRequested) {
					Log.Debug ("Shutdown initiated. Breaking while flushing indexables.");
					break;
				}

				// Receipts for removes were generated in the
				// previous block.  Now we just have to remove
				// items from the text cache.
				if (indexable.Type == IndexableType.Remove) {
					if (text_cache != null)
						text_cache.Delete (indexable.Uri);

					continue;
				}

				IndexerAddedReceipt r;
				Hashtable prop_change_docs = (Hashtable) parent_child_old_props [indexable.Uri];

				if (indexable.Type == IndexableType.PropertyChange) {

					Logger.Log.Debug ("+{0} (props only)", indexable.DisplayUri);

					r = new IndexerAddedReceipt (indexable.Id);
					r.PropertyChangesOnly = true;
					receipt_queue.Add (r);

					Document doc;
					if (prop_change_docs == null)
						doc = null;
					else
						doc = (Document) prop_change_docs [indexable.Uri];

					Document new_doc;
					new_doc = RewriteDocument (doc, indexable);

					// Write out the new document...
					if (secondary_writer == null)
						secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false);
					secondary_writer.AddDocument (new_doc);

					// Get child property change indexables...
					ArrayList prop_change_indexables;
					prop_change_indexables = GetChildPropertyChange (prop_change_docs, indexable);
					// and store them; no need to delete them first, since they were already removed from the index
					if (prop_change_indexables == null)
						continue;

					foreach (Indexable prop_change_indexable in prop_change_indexables) {
						Log.Debug ("+{0} (props only, generated indexable)", prop_change_indexable.Uri);
						doc = (Document) prop_change_docs [prop_change_indexable.Uri];
						new_doc = RewriteDocument (doc, prop_change_indexable);
						secondary_writer.AddDocument (new_doc);
					}

					continue; // ...and proceed to the next Indexable
				}

				// If we reach this point we know we are dealing with an IndexableType.Add

				if (indexable.Type != IndexableType.Add)
					throw new Exception ("When I said it was an IndexableType.Add, I meant it!");

				r = AddIndexableToIndex (indexable, primary_writer, ref secondary_writer, prop_change_docs);
				if (r != null)
					receipt_queue.Add (r);
			}

			if (text_cache != null)
				text_cache.CommitTransaction ();

			if (Shutdown.ShutdownRequested) {
				foreach (DeferredInfo di in deferred_indexables)
					di.Cleanup ();
				deferred_indexables.Clear ();

				foreach (Indexable indexable in request_indexables)
					indexable.Cleanup ();

				primary_writer.Close ();
				if (secondary_writer != null)
					secondary_writer.Close ();

				return null;
			}

			if (request.OptimizeIndex) {
				Stopwatch watch = new Stopwatch ();
				Logger.Log.Debug ("Optimizing {0}", IndexName);
				watch.Start ();
				primary_writer.Optimize ();
				if (secondary_writer == null)
					secondary_writer = new IndexWriter (SecondaryStore, IndexingAnalyzer, false);
				secondary_writer.Optimize ();
				watch.Stop ();
				Logger.Log.Debug ("{0} optimized in {1}", IndexName, watch);
			}

			// Step #4. Close our writers and return the events to
			// indicate what has happened.

			primary_writer.Close ();
			if (secondary_writer != null)
				secondary_writer.Close ();

			// Send a single IndexerIndexablesReceipt if there were deferred indexables
			if (deferred_indexables.Count > 0) {
				Log.Debug ("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count);
				IndexerIndexablesReceipt r = new IndexerIndexablesReceipt ();
				receipt_queue.Add (r);
			}

			IndexerReceipt [] receipt_array;
			receipt_array = new IndexerReceipt [receipt_queue.Count];
			for (int i = 0; i < receipt_queue.Count; ++i)
				receipt_array [i] = (IndexerReceipt) receipt_queue [i];

			return receipt_array;
		}