Пример #1
0
        override public ISnippetReader GetSnippet(string[] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
        {
            if (text_cache == null)
            {
                return(null);
            }

            Uri uri;

            if (mount_dir == null || hit ["beagrep:RemovableUri"] == null)
            {
                uri = hit.Uri;
            }
            else
            {
                uri = UriFu.EscapedStringToUri(hit ["beagrep:RemovableUri"]);
            }

            // Look up the hit in our local text cache.
            // Need to handle self-cached removable:/// uris
            bool       self_cache = true;
            TextReader reader     = text_cache.GetReader(uri, ref self_cache);

            if (self_cache)
            {
                reader = new StreamReader(hit.Uri.LocalPath);
            }
            else if (reader == null)
            {
                return(null);
            }

            return(SnippetFu.GetSnippet(query_terms, reader, full_text, ctx_length, snp_length));
        }
Пример #2
0
        override protected Uri GetChildUri(FileSystemObject child)
        {
            string s = Path.Combine(UriFu.UriToEscapedString(this.Uri), child.Name);

            //Console.WriteLine ("Asked to combine {0} and {1} = {2}", UriFu.UriToEscapedString (this.Uri), child.Name, s);
            return(UriFu.EscapedStringToUri(s));
        }
        private IndexerReceipt [] Flush_Unlocked(IndexerRequest request)
        {
            ArrayList receipt_queue;

            receipt_queue = new ArrayList();

            IndexReader primary_reader, secondary_reader;

            primary_reader   = IndexReader.Open(PrimaryStore);
            secondary_reader = IndexReader.Open(SecondaryStore);

            // Step #1: Make our first pass over the list of
            // indexables that make up our request.  For each add
            // or property change in the request, get the Lucene
            // documents so we can move forward any persistent
            // properties (for adds) or all old properties (for
            // property changes).
            //
            // Then, for each add or remove in the request,
            // delete the associated documents from the index.
            // Note that we previously cached added documents so
            // that we can move persistent properties forward.

            // parent_child_old_props is double-nested hashtable (depth-2 tree)
            // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents)
            // FIXME: 2-level hashtable is a waste for any non-child document.
            // Replace this by a better data structure.
            Hashtable parent_child_old_props = UriFu.NewHashtable();
            TermDocs  term_docs    = secondary_reader.TermDocs();
            int       delete_count = 0;

            IEnumerable request_indexables = request.Indexables;

            foreach (Indexable indexable in request_indexables)
            {
                string uri_str = UriFu.UriToEscapedString(indexable.Uri);
                Term   term;

                // Store the necessary properties from old documents for re-addition
                if (indexable.Type == IndexableType.Add ||
                    indexable.Type == IndexableType.PropertyChange)
                {
                    term = new Term("Uri", uri_str);
                    term_docs.Seek(term);

                    Hashtable this_parent_child_props = null;

                    if (term_docs.Next())
                    {
                        this_parent_child_props = UriFu.NewHashtable();
                        this_parent_child_props [indexable.Uri] = secondary_reader.Document(term_docs.Doc());
                        parent_child_old_props [indexable.Uri]  = this_parent_child_props;
                    }

                    term = new Term("ParentUri", uri_str);
                    term_docs.Seek(term);

                    while (term_docs.Next())
                    {
                        Document doc = secondary_reader.Document(term_docs.Doc());

                        string child_uri_str = doc.Get("Uri");
                        Uri    child_uri     = UriFu.EscapedStringToUri(child_uri_str);
                        // Any valid lucene document *should* have a Uri, so no need to check for null
                        // Store the child documents too, to save persistent-properties
                        // of child documents
                        this_parent_child_props [child_uri] = doc;
                    }
                }

                // Now remove (non-remove indexables will be re-added in next block)
                Logger.Log.Debug("-{0}", indexable.DisplayUri);

                int num_delete = 0;

                term = new Term("Uri", uri_str);
                // For property changes, only secondary index is modified
                secondary_reader.DeleteDocuments(term);

                // Now remove from everywhere else (if asked to remove or if asked to add, in which case
                // we first remove and then add)
                // So we also need to remove child documents
                if (indexable.Type != IndexableType.PropertyChange)
                {
                    num_delete = primary_reader.DeleteDocuments(term);

                    // When we delete an indexable, also delete any children.
                    // FIXME: Shouldn't we also delete any children of children, etc.?
                    term        = new Term("ParentUri", uri_str);
                    num_delete += primary_reader.DeleteDocuments(term);
                    secondary_reader.DeleteDocuments(term);
                }

                // If this is a strict removal (and not a deletion that
                // we are doing in anticipation of adding something back),
                // queue up a removed receipt.
                if (indexable.Type == IndexableType.Remove)
                {
                    IndexerRemovedReceipt r;
                    r            = new IndexerRemovedReceipt(indexable.Id);
                    r.NumRemoved = num_delete;
                    receipt_queue.Add(r);
                }

                delete_count += num_delete;
            }

            term_docs.Close();

            if (HaveItemCount)
            {
                AdjustItemCount(-delete_count);
            }
            else
            {
                SetItemCount(primary_reader);
            }

            // We are now done with the readers, so we close them.
            // And also free them. Somehow not freeing them is preventing them from
            // GCed at all.
            primary_reader.Close();
            primary_reader = null;
            secondary_reader.Close();
            secondary_reader = null;

            // FIXME: If we crash at exactly this point, we are in
            // trouble.  Items will have been dropped from the index
            // without the proper replacements being added.  We can
            // hopefully fix this when we move to Lucene 2.1.

            // Step #2: Make another pass across our list of indexables
            // and write out any new documents.

            if (text_cache != null)
            {
                text_cache.BeginTransaction();
            }

            IndexWriter primary_writer, secondary_writer;

            // FIXME: Lock obtain time-out can happen here; if that happens,
            // an exception will be thrown and this method will break in the middle
            // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification
            // methods.
            primary_writer   = new IndexWriter(PrimaryStore, IndexingAnalyzer, false);
            secondary_writer = null;

            foreach (Indexable indexable in request_indexables)
            {
                // If shutdown has been started, break here
                // FIXME: Some more processing will continue, a lot of them
                // concerning receipts, but the daemon will anyway ignore receipts
                // now, what is the fastest way to stop from here ?
                if (Shutdown.ShutdownRequested)
                {
                    Log.Debug("Shutdown initiated. Breaking while flushing indexables.");
                    break;
                }

                // Receipts for removes were generated in the
                // previous block.  Now we just have to remove
                // items from the text cache.
                if (indexable.Type == IndexableType.Remove)
                {
                    if (text_cache != null)
                    {
                        text_cache.Delete(indexable.Uri);
                    }

                    continue;
                }

                IndexerAddedReceipt r;
                Hashtable           prop_change_docs = (Hashtable)parent_child_old_props [indexable.Uri];

                if (indexable.Type == IndexableType.PropertyChange)
                {
                    Logger.Log.Debug("+{0} (props only)", indexable.DisplayUri);

                    r = new IndexerAddedReceipt(indexable.Id);
                    r.PropertyChangesOnly = true;
                    receipt_queue.Add(r);

                    Document doc;
                    if (prop_change_docs == null)
                    {
                        doc = null;
                    }
                    else
                    {
                        doc = (Document)prop_change_docs [indexable.Uri];
                    }

                    Document new_doc;
                    new_doc = RewriteDocument(doc, indexable);

                    // Write out the new document...
                    if (secondary_writer == null)
                    {
                        secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                    }
                    secondary_writer.AddDocument(new_doc);

                    // Get child property change indexables...
                    ArrayList prop_change_indexables;
                    prop_change_indexables = GetChildPropertyChange(prop_change_docs, indexable);
                    // and store them; no need to delete them first, since they were already removed from the index
                    if (prop_change_indexables == null)
                    {
                        continue;
                    }

                    foreach (Indexable prop_change_indexable in prop_change_indexables)
                    {
                        Log.Debug("+{0} (props only, generated indexable)", prop_change_indexable.Uri);
                        doc     = (Document)prop_change_docs [prop_change_indexable.Uri];
                        new_doc = RewriteDocument(doc, prop_change_indexable);
                        secondary_writer.AddDocument(new_doc);
                    }

                    continue;                     // ...and proceed to the next Indexable
                }

                // If we reach this point we know we are dealing with an IndexableType.Add

                if (indexable.Type != IndexableType.Add)
                {
                    throw new Exception("When I said it was an IndexableType.Add, I meant it!");
                }

                r = AddIndexableToIndex(indexable, primary_writer, ref secondary_writer, prop_change_docs);
                if (r != null)
                {
                    receipt_queue.Add(r);
                }
            }

            if (text_cache != null)
            {
                text_cache.CommitTransaction();
            }

            if (Shutdown.ShutdownRequested)
            {
                foreach (DeferredInfo di in deferred_indexables)
                {
                    di.Cleanup();
                }
                deferred_indexables.Clear();

                foreach (Indexable indexable in request_indexables)
                {
                    indexable.Cleanup();
                }

                primary_writer.Close();
                if (secondary_writer != null)
                {
                    secondary_writer.Close();
                }

                return(null);
            }

            if (request.OptimizeIndex)
            {
                Stopwatch watch = new Stopwatch();
                Logger.Log.Debug("Optimizing {0}", IndexName);
                watch.Start();
                primary_writer.Optimize();
                if (secondary_writer == null)
                {
                    secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                }
                secondary_writer.Optimize();
                watch.Stop();
                Logger.Log.Debug("{0} optimized in {1}", IndexName, watch);
            }

            // Step #4. Close our writers and return the events to
            // indicate what has happened.

            primary_writer.Close();
            if (secondary_writer != null)
            {
                secondary_writer.Close();
            }

            // Send a single IndexerIndexablesReceipt if there were deferred indexables
            if (deferred_indexables.Count > 0)
            {
                Log.Debug("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count);
                IndexerIndexablesReceipt r = new IndexerIndexablesReceipt();
                receipt_queue.Add(r);
            }

            IndexerReceipt [] receipt_array;
            receipt_array = new IndexerReceipt [receipt_queue.Count];
            for (int i = 0; i < receipt_queue.Count; ++i)
            {
                receipt_array [i] = (IndexerReceipt)receipt_queue [i];
            }

            return(receipt_array);
        }