コード例 #1
0
ファイル: TermScorer.cs プロジェクト: mindis/Transformalize
        // firstDocID is ignored since nextDoc() sets 'doc'
        public /*protected internal*/ override bool Score(Collector c, int end, int firstDocID)
        {
            c.SetScorer(this);
            while (doc < end)
            {
                // for docs in window
                c.Collect(doc);                 // collect score

                if (++pointer >= pointerMax)
                {
                    pointerMax = termDocs.Read(docs, freqs);                     // refill buffers
                    if (pointerMax != 0)
                    {
                        pointer = 0;
                    }
                    else
                    {
                        termDocs.Close();                         // close stream
                        doc = System.Int32.MaxValue;              // set to sentinel value
                        return(false);
                    }
                }
                doc = docs[pointer];
            }
            return(true);
        }
コード例 #2
0
ファイル: TestIndexReader.cs プロジェクト: raj581/Marvin
        private void  AssertTermDocsCount(System.String msg, IndexReader reader, Term term, int expected)
        {
            TermDocs tdocs = null;

            try
            {
                tdocs = reader.TermDocs(term);
                Assert.IsNotNull(tdocs, msg + ", null TermDocs");
                int count = 0;
                while (tdocs.Next())
                {
                    count++;
                }
                Assert.AreEqual(expected, count, msg + ", count mismatch");
            }
            finally
            {
                if (tdocs != null)
                {
                    try
                    {
                        tdocs.Close();
                    }
                    catch (System.Exception e)
                    {
                    }
                }
            }
        }
コード例 #3
0
 protected virtual void Dispose(bool disposing)
 {
     if (disposing)
     {
         in_Renamed.Close();
     }
 }
コード例 #4
0
 public virtual void  Close()
 {
     if (termDocs != null)
     {
         termDocs.Close();
     }
 }
コード例 #5
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                System.String   field    = StringHelper.Intern(entryKey.field);
                System.String[] retArray = new System.String[reader.MaxDoc];
                TermDocs        termDocs = reader.TermDocs();
                TermEnum        termEnum = reader.Terms(new Term(field));

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || (System.Object)term.Field != (System.Object)field)
                        {
                            break;
                        }
                        System.String termval = term.Text;
                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc] = termval;
                        }
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                return(retArray);
            }
コード例 #6
0
ファイル: IndexReader.cs プロジェクト: ferrod20/tprilucene
        /// <summary>Deletes all documents that have a given <code>term</code> indexed.
        /// This is useful if one uses a document field to hold a unique ID string for
        /// the document.  Then to delete such a document, one merely constructs a
        /// term with the appropriate field and the unique ID string as its text and
        /// passes it to this method.
        /// See {@link #DeleteDocument(int)} for information about when this deletion will
        /// become effective.
        ///
        /// </summary>
        /// <returns> the number of documents deleted
        /// </returns>
        /// <throws>  StaleReaderException if the index has changed </throws>
        /// <summary>  since this reader was opened
        /// </summary>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  LockObtainFailedException if another writer </throws>
        /// <summary>  has this index open (<code>write.lock</code> could not
        /// be obtained)
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public int DeleteDocuments(Term term)
        {
            EnsureOpen();
            TermDocs docs = TermDocs(term);

            if (docs == null)
            {
                return(0);
            }
            int n = 0;

            try
            {
                while (docs.Next())
                {
                    DeleteDocument(docs.Doc());
                    n++;
                }
            }
            finally
            {
                docs.Close();
            }
            return(n);
        }
コード例 #7
0
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            TermEnum enumerator = query.GetEnum(reader);

            try
            {
                // if current term in enum is null, the enum is empty -> shortcut
                if (enumerator.Term == null)
                {
                    return(DocIdSet.EMPTY_DOCIDSET);
                }
                // else fill into an OpenBitSet
                OpenBitSet bitSet   = new OpenBitSet(reader.MaxDoc);
                int[]      docs     = new int[32];
                int[]      freqs    = new int[32];
                TermDocs   termDocs = reader.TermDocs();
                try
                {
                    int termCount = 0;
                    do
                    {
                        Term term = enumerator.Term;
                        if (term == null)
                        {
                            break;
                        }
                        termCount++;
                        termDocs.Seek(term);
                        while (true)
                        {
                            int count = termDocs.Read(docs, freqs);
                            if (count != 0)
                            {
                                for (int i = 0; i < count; i++)
                                {
                                    bitSet.Set(docs[i]);
                                }
                            }
                            else
                            {
                                break;
                            }
                        }
                    } while (enumerator.Next());

                    query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right?
                }
                finally
                {
                    termDocs.Close();
                }

                return(bitSet);
            }
            finally
            {
                enumerator.Close();
            }
        }
コード例 #8
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                Entry entry = entryKey;

                System.String field  = entry.field;
                FloatParser   parser = (FloatParser)entry.custom;

                if (parser == null)
                {
                    try
                    {
                        return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER));
                    }
                    catch (System.FormatException)
                    {
                        return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER));
                    }
                }
                float[]  retArray = null;
                TermDocs termDocs = reader.TermDocs();
                TermEnum termEnum = reader.Terms(new Term(field));

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || (System.Object)term.Field != (System.Object)field)
                        {
                            break;
                        }
                        float termval = parser.ParseFloat(term.Text);
                        if (retArray == null)
                        {
                            // late init
                            retArray = new float[reader.MaxDoc];
                        }
                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc] = termval;
                        }
                    }while (termEnum.Next());
                }
                catch (StopFillCacheException)
                {
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                if (retArray == null)
                {
                    // no values
                    retArray = new float[reader.MaxDoc];
                }
                return(retArray);
            }
コード例 #9
0
        public virtual void  TestFilterIndexReader_Renamed()
        {
            RAMDirectory directory = new MockRAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document d1 = new Document();

            d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d3);

            writer.Close();

            IndexReader reader = new TestReader(IndexReader.Open(directory));

            Assert.IsTrue(reader.IsOptimized());

            TermEnum terms = reader.Terms();

            while (terms.Next())
            {
                Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1);
            }
            terms.Close();

            TermPositions positions = reader.TermPositions(new Term("default", "one"));

            while (positions.Next())
            {
                Assert.IsTrue((positions.Doc() % 2) == 1);
            }

            int NUM_DOCS = 3;

            TermDocs td = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
            directory.Close();
        }
コード例 #10
0
        public static int Count(Term t, IndexReader r)
        {
            int      count = 0;
            TermDocs td    = r.TermDocs(t, null);

            while (td.Next(null))
            {
                var d = td.Doc;
                count++;
            }
            td.Close();
            return(count);
        }
コード例 #11
0
        public static int Count(Term t, IndexReader r)
        {
            int      count = 0;
            TermDocs td    = r.TermDocs(t);

            while (td.Next())
            {
                td.Doc();
                count++;
            }
            td.Close();
            return(count);
        }
コード例 #12
0
        public virtual void TestMultiTermDocs()
        {
            SqlServerDirectory.ProvisionDatabase(Connection, "test1", true);
            SqlServerDirectory.ProvisionDatabase(Connection, "test2", true);
            SqlServerDirectory.ProvisionDatabase(Connection, "test3", true);

            var ramDir1 = new SqlServerDirectory(Connection, new Options()
            {
                SchemaName = "test1"
            });

            AddDoc(ramDir1, "test foo", true);
            var ramDir2 = new SqlServerDirectory(Connection, new Options()
            {
                SchemaName = "test2"
            });

            AddDoc(ramDir2, "test blah", true);
            var ramDir3 = new SqlServerDirectory(Connection, new Options()
            {
                SchemaName = "test3"
            });

            AddDoc(ramDir3, "test wow", true);

            IndexReader[] readers1 = new[] { IndexReader.Open(ramDir1, false), IndexReader.Open(ramDir3, false) };
            IndexReader[] readers2 = new[] { IndexReader.Open(ramDir1, false), IndexReader.Open(ramDir2, false), IndexReader.Open(ramDir3, false) };
            MultiReader   mr2      = new MultiReader(readers1);
            MultiReader   mr3      = new MultiReader(readers2);

            // test mixing up TermDocs and TermEnums from different readers.
            TermDocs td2 = mr2.TermDocs();
            TermEnum te3 = mr3.Terms(new Term("body", "wow"));

            td2.Seek(te3);
            int ret = 0;

            // This should blow up if we forget to check that the TermEnum is from the same
            // reader as the TermDocs.
            while (td2.Next())
            {
                ret += td2.Doc;
            }
            td2.Close();
            te3.Close();

            // really a dummy assert to ensure that we got some docs and to ensure that
            // nothing is optimized out.
            Assert.IsTrue(ret > 0);
        }
コード例 #13
0
        /// <summary>
        /// Deletes a number of documents that conform to the specified Term-s
        /// </summary>
        /// <param name="terms">Term-s to be deleted</param>
        /// <returns>A number of documents deleted</returns>
        public int OptimizedDeleteDocuments(Term[] terms)
        {
            int n = 0;

            lock (this)
            {
                if (directoryOwner)
                {
                    AquireWriteLock();
                }

                foreach (Term term in terms)
                {
                    TermDocs docs = TermDocs(term);

                    if (docs == null)
                    {
                        continue;
                    }

                    try
                    {
                        while (docs.Next())
                        {
                            DoDelete(docs.Doc());

                            hasChanges = true;

                            n++;
                        }
                    }
                    finally
                    {
                        docs.Close();
                    }
                }

                // Release the lock ASAP if there are no changes

                if (!hasChanges &&
                    writeLock != null)
                {
                    writeLock.Release();
                    writeLock = null;
                }
            }

            return(n);
        }
コード例 #14
0
        private void Remove(System.Type entity, object id, IDirectoryProvider provider)
        {
            /*
             * even with Lucene 2.1, use of indexWriter to delete is not an option
             * We can only delete by term, and the index doesn't have a termt that
             * uniquely identify the entry. See logic below
             */
            log.DebugFormat("remove from Lucene index: {0}#{1}", entity, id);
            DocumentBuilder builder  = workspace.GetDocumentBuilder(entity);
            Term            term     = builder.GetTerm(id);
            IndexReader     reader   = workspace.GetIndexReader(provider, entity);
            TermDocs        termDocs = null;

            try
            {
                // TODO is there a faster way?
                // TODO include TermDocs into the workspace?
                termDocs = reader.TermDocs(term);
                string entityName = TypeHelper.LuceneTypeName(entity);
                while (termDocs.Next())
                {
                    int docIndex = termDocs.Doc;
                    if (entityName.Equals(reader.Document(docIndex).Get(DocumentBuilder.CLASS_FIELDNAME)))
                    {
                        // remove only the one of the right class
                        // loop all to remove all the matches (defensive code)
                        reader.DeleteDocument(docIndex);
                    }
                }
            }
            catch (Exception e)
            {
                throw new SearchException("Unable to remove from Lucene index: " + entity + "#" + id, e);
            }
            finally
            {
                if (termDocs != null)
                {
                    try
                    {
                        termDocs.Close();
                    }
                    catch (IOException e)
                    {
                        log.Warn("Unable to close termDocs properly", e);
                    }
                }
            }
        }
コード例 #15
0
        public virtual void  TestAllTermDocs()
        {
            IndexReader reader   = OpenReader();
            int         NUM_DOCS = 2;
            TermDocs    td       = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
        }
コード例 #16
0
            protected virtual void Dispose(bool disposing)
            {
                if (isDisposed)
                {
                    return;
                }

                if (disposing)
                {
                    if (termDocs != null)
                    {
                        termDocs.Close();
                    }
                }

                isDisposed = true;
            }
コード例 #17
0
        /// <summary>
        /// 得到指定Term的文档
        /// </summary>
        /// <param name="term"></param>
        /// <returns></returns>
        public IList <TermDoc> DocumentCount(Term term)
        {
            TermDocs       docs = open.Reader.TermDocs(term);
            List <TermDoc> list = new List <TermDoc>();

            while (docs.Next())
            {
                TermDoc doc2 = new TermDoc();
                doc2.Freq = docs.Freq();
                doc2.Doc  = docs.Doc();
                doc2.Term = term;
                doc2.Norm = GetNorm(open.Reader, term.Field(), doc2.Doc);
                TermDoc item = doc2;
                list.Add(item);
            }
            docs.Close();
            return(list);
        }
コード例 #18
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                Entry entry = entryKey;

                System.String field  = entry.field;
                ShortParser   parser = (ShortParser)entry.custom;

                if (parser == null)
                {
                    return(wrapper.GetShorts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER));
                }
                short[]  retArray = new short[reader.MaxDoc];
                TermDocs termDocs = reader.TermDocs();
                TermEnum termEnum = reader.Terms(new Term(field));

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || (System.Object)term.Field != (System.Object)field)
                        {
                            break;
                        }
                        short termval = parser.ParseShort(term.Text);
                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc] = termval;
                        }
                    }while (termEnum.Next());
                }
                catch (StopFillCacheException)
                {
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                return(retArray);
            }
コード例 #19
0
        public virtual void  TestAllTermDocs()
        {
            Directory      dir1 = GetDir1();
            Directory      dir2 = GetDir2();
            ParallelReader pr   = new ParallelReader();

            pr.Add(IndexReader.Open(dir1));
            pr.Add(IndexReader.Open(dir2));
            int      NUM_DOCS = 2;
            TermDocs td       = pr.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            pr.Close();
            dir1.Close();
            dir2.Close();
        }
コード例 #20
0
ファイル: TermsFilter.cs プロジェクト: mundher/lucene.net
        /// <summary>
        /// Get the DocIdSet.
        /// </summary>
        /// <param name="reader">Applcible reader.</param>
        /// <returns>The set.</returns>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSet result = new OpenBitSet(reader.MaxDoc);
            TermDocs   td     = reader.TermDocs();

            try
            {
                foreach (Term t in this.terms)
                {
                    td.Seek(t);
                    while (td.Next())
                    {
                        result.Set(td.Doc);
                    }
                }
            }
            finally
            {
                td.Close();
            }

            return(result);
        }
コード例 #21
0
        public virtual void  TestMultiTermDocs()
        {
            RAMDirectory ramDir1 = new RAMDirectory();

            AddDoc(ramDir1, "test foo", true);
            RAMDirectory ramDir2 = new RAMDirectory();

            AddDoc(ramDir2, "test blah", true);
            RAMDirectory ramDir3 = new RAMDirectory();

            AddDoc(ramDir3, "test wow", true);

            IndexReader[] readers1 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir3) };
            IndexReader[] readers2 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir2), IndexReader.Open(ramDir3) };
            MultiReader   mr2      = new MultiReader(readers1);
            MultiReader   mr3      = new MultiReader(readers2);

            // test mixing up TermDocs and TermEnums from different readers.
            TermDocs td2 = mr2.TermDocs();
            TermEnum te3 = mr3.Terms(new Term("body", "wow"));

            td2.Seek(te3);
            int ret = 0;

            // This should blow up if we forget to check that the TermEnum is from the same
            // reader as the TermDocs.
            while (td2.Next())
            {
                ret += td2.Doc();
            }
            td2.Close();
            te3.Close();

            // really a dummy assert to ensure that we got some docs and to ensure that
            // nothing is optimized out.
            Assert.IsTrue(ret > 0);
        }
コード例 #22
0
        ////////////////////////////////////////////////////////////////

        static private void ScoreHits(Dictionary <int, Hit> hits_by_id,
                                      IndexReader reader,
                                      ICollection term_list)
        {
            LNS.Similarity similarity;
            similarity = LNS.Similarity.GetDefault();

            TermDocs term_docs = reader.TermDocs();
            Hit      hit;

            foreach (Term term in term_list)
            {
                double idf;
                idf = similarity.Idf(reader.DocFreq(term), reader.MaxDoc());

                int hit_count;
                hit_count = hits_by_id.Count;

                term_docs.Seek(term);
                while (term_docs.Next() && hit_count > 0)
                {
                    int id;
                    id = term_docs.Doc();

                    if (hits_by_id.TryGetValue(id, out hit))
                    {
                        double tf;
                        tf         = similarity.Tf(term_docs.Freq());
                        hit.Score += tf * idf;
                        --hit_count;
                    }
                }
            }

            term_docs.Close();
        }
コード例 #23
0
        /// <summary>
        /// Deletes the specified reader.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <param name="term">The term.</param>
        /// <returns></returns>
        public int Delete(IndexReader reader, Term term)
        {
            TermDocs docs = reader.TermDocs(term);

            if (docs == null)
            {
                return(0);
            }
            int num = 0;

            try
            {
                while (docs.Next())
                {
                    reader.DeleteDocument(docs.Doc());
                    num++;
                }
            }
            finally
            {
                docs.Close();
            }
            return(num);
        }
コード例 #24
0
ファイル: LuceneBitArray.cs プロジェクト: universsky/beagrep
        public void FlushUris()
        {
            if (pending_uris == null)
            {
                return;
            }

            TermDocs term_docs = this.searcher.Reader.TermDocs();

            for (int i = 0; i < pending_uris.Count; i++)
            {
                Term term = new Term("Uri", (string)pending_uris [i]);
                term_docs.Seek(term);

                if (term_docs.Next())
                {
                    this.Set(term_docs.Doc(), true);
                }
            }

            term_docs.Close();

            pending_uris = null;
        }
コード例 #25
0
 public virtual void  Close()
 {
     termDocs.Close();
 }
コード例 #26
0
 public virtual void  Close()
 {
     in_Renamed.Close();
 }
コード例 #27
0
        // There are two ways we can determine the max_results
        // most recent items:
        //
        // One is to instantiate Lucene documents for each of
        // the document IDs in primary_matches.  This is a
        // fairly expensive operation.
        //
        // The other is to walk through the list of all
        // document IDs in descending time order.  This is
        // a less expensive operation, but adds up over time
        // on large data sets.
        //
        // We can walk about 2.5 docs for every Document we
        // instantiate.  So what we'll do, if we have more
        // matches than available hits, is walk (m * 1.25)
        // docs to see if we can fill out the top 100 hits.
        // If not, we'll fall back to creating documents
        // for all of them.

        private static ArrayList ScanRecentDocs(IndexReader primary_reader,
                                                IndexReader secondary_reader,
                                                BetterBitArray primary_matches,
                                                Dictionary <int, Hit> hits_by_id,
                                                int max_results,
                                                ref int total_number_of_matches,
                                                HitFilter hit_filter,
                                                string index_name)
        {
            Stopwatch a = new Stopwatch();

            a.Start();

            TermDocs  docs               = primary_reader.TermDocs();
            TermEnum  enumerator         = primary_reader.Terms(new Term("InvertedTimestamp", String.Empty));
            ArrayList results            = new ArrayList(max_results);
            int       docs_found         = 0;
            int       docs_walked        = 0;
            int       hit_filter_removed = 0;
            int       max_docs           = (int)(primary_matches.TrueCount * 1.25);

            Term     term;
            TermDocs secondary_term_docs = null;

            if (secondary_reader != null)
            {
                secondary_term_docs = secondary_reader.TermDocs();
            }

            do
            {
                term = enumerator.Term();

                if (term.Field() != "InvertedTimestamp")
                {
                    break;
                }

                docs.Seek(enumerator);

                while (docs.Next() &&
                       docs_found < max_results &&
                       docs_walked < max_docs)
                {
                    int doc_id = docs.Doc();

                    if (primary_matches.Get(doc_id))
                    {
                        Document doc = primary_reader.Document(doc_id);
                        Hit      hit = CreateHit(doc, secondary_reader, secondary_term_docs);

                        // If we have a HitFilter, apply it.
                        if (hit_filter != null && !hit_filter(hit))
                        {
                            if (Debug)
                            {
                                Log.Debug("Filtered out {0}", hit.Uri);
                            }
                            hit_filter_removed++;
                            continue;
                        }
                        hits_by_id [doc_id] = hit;
                        // Add the result, last modified first
                        results.Add(hit);
                        docs_found++;
                    }

                    docs_walked++;
                }
            } while (enumerator.Next() &&
                     docs_found < max_results &&
                     docs_walked < max_docs);

            docs.Close();
            if (secondary_term_docs != null)
            {
                secondary_term_docs.Close();
            }

            // If we've found all the docs we can return in a subset!
            // Fantastic, we've probably short circuited a slow search.
            if (docs_found != max_results)
            {
                // Otherwise bad luck! Not all docs found
                // Start afresh - this time traversing all results
                results = null;
            }
            else
            {
                // Adjust total_number_of_matches. We need to do this to avoid scenarios like the following:
                // max_hits = 100. Matched 100 results. But hit filter removed 30. So 70 results will be returned.
                // We want to avoid saying "Showing top 70 of 100". Note that since we are not passing
                // every document in the index through the hit_filter, when we say "Showing top 100 of 1234", the
                // 1234 could actually be much less. But since max_hits was 100, that will not mislead the user.
                total_number_of_matches -= hit_filter_removed;
            }

            a.Stop();
            if (Debug)
            {
                Log.Debug(">>> {0}: Walked {1} items, populated an enum with {2} items in {3}", index_name, docs_walked, docs_found, a);

                if (docs_found == max_results)
                {
                    Log.Debug(">>> {0}: Successfully short circuited timestamp ordering!", index_name);
                }
            }

            return(results);
        }
コード例 #28
0
        private static ArrayList   FindRecentResults(IndexReader primary_reader,
                                                     IndexReader secondary_reader,
                                                     BetterBitArray primary_matches,
                                                     Dictionary <int, Hit> hits_by_id,
                                                     int max_results,
                                                     ref int total_number_of_matches,
                                                     HitFilter hit_filter,
                                                     string index_name)
        {
            Stopwatch b = new Stopwatch();

            b.Start();

            int      count = 0;
            Document doc;

            ArrayList all_docs  = null;
            TopScores top_docs  = null;
            TermDocs  term_docs = null;

            if (primary_matches.TrueCount > max_results)
            {
                top_docs = new TopScores(max_results);
            }
            else
            {
                all_docs = new ArrayList(primary_matches.TrueCount);
            }

            if (secondary_reader != null)
            {
                term_docs = secondary_reader.TermDocs();
            }

            for (int match_index = primary_matches.Count; ; match_index--)
            {
                // Walk across the matches backwards, since newer
                // documents are more likely to be at the end of
                // the index.
                match_index = primary_matches.GetPreviousTrueIndex(match_index);
                if (match_index < 0)
                {
                    break;
                }

                count++;

                doc = primary_reader.Document(match_index, fields_timestamp_uri);

                // Check the timestamp --- if we have already reached our
                // limit, we might be able to reject it immediately.
                string timestamp_str;
                long   timestamp_num = 0;

                timestamp_str = doc.Get("Timestamp");
                if (timestamp_str == null)
                {
                    Logger.Log.Warn("No timestamp on {0}!", GetUriFromDocument(doc));
                }
                else
                {
                    timestamp_num = Int64.Parse(doc.Get("Timestamp"));
                    if (top_docs != null && !top_docs.WillAccept(timestamp_num))
                    {
                        continue;
                    }
                }

                // Get the actual hit now
                // doc was created with only 2 fields, so first get the complete lucene document for primary document.
                // Also run our hit_filter now, if we have one. Since we insist of returning max_results
                // most recent hits, any hits that would be filtered out should happen now and not later.
                Hit hit = CreateHit(primary_reader.Document(match_index), secondary_reader, term_docs);
                if (hit_filter != null && !hit_filter(hit))
                {
                    if (Debug)
                    {
                        Log.Debug("Filtered out {0}", hit.Uri);
                    }
                    total_number_of_matches--;
                    continue;
                }

                hits_by_id [match_index] = hit;

                // Add the document to the appropriate data structure.
                // We use the timestamp_num as the score, so high
                // scores correspond to more-recent timestamps.
                if (all_docs != null)
                {
                    all_docs.Add(hit);
                }
                else
                {
                    top_docs.Add(timestamp_num, hit);
                }
            }

            if (term_docs != null)
            {
                term_docs.Close();
            }

            b.Stop();

            if (Debug)
            {
                Log.Debug(">>> {0}: Instantiated and scanned {1} documents in {2}", index_name, count, b);
            }

            if (all_docs != null)
            {
                // Sort results before sending
                all_docs.Sort();
                return(all_docs);
            }
            else
            {
                return(top_docs.TopScoringObjects);
            }
        }
コード例 #29
0
        private IndexerReceipt [] Flush_Unlocked(IndexerRequest request)
        {
            ArrayList receipt_queue;

            receipt_queue = new ArrayList();

            IndexReader primary_reader, secondary_reader;

            primary_reader   = IndexReader.Open(PrimaryStore);
            secondary_reader = IndexReader.Open(SecondaryStore);

            // Step #1: Make our first pass over the list of
            // indexables that make up our request.  For each add
            // or property change in the request, get the Lucene
            // documents so we can move forward any persistent
            // properties (for adds) or all old properties (for
            // property changes).
            //
            // Then, for each add or remove in the request,
            // delete the associated documents from the index.
            // Note that we previously cached added documents so
            // that we can move persistent properties forward.

            // parent_child_old_props is double-nested hashtable (depth-2 tree)
            // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents)
            // FIXME: 2-level hashtable is a waste for any non-child document.
            // Replace this by a better data structure.
            Hashtable parent_child_old_props = UriFu.NewHashtable();
            TermDocs  term_docs    = secondary_reader.TermDocs();
            int       delete_count = 0;

            IEnumerable request_indexables = request.Indexables;

            foreach (Indexable indexable in request_indexables)
            {
                string uri_str = UriFu.UriToEscapedString(indexable.Uri);
                Term   term;

                // Store the necessary properties from old documents for re-addition
                if (indexable.Type == IndexableType.Add ||
                    indexable.Type == IndexableType.PropertyChange)
                {
                    term = new Term("Uri", uri_str);
                    term_docs.Seek(term);

                    Hashtable this_parent_child_props = null;

                    if (term_docs.Next())
                    {
                        this_parent_child_props = UriFu.NewHashtable();
                        this_parent_child_props [indexable.Uri] = secondary_reader.Document(term_docs.Doc());
                        parent_child_old_props [indexable.Uri]  = this_parent_child_props;
                    }

                    term = new Term("ParentUri", uri_str);
                    term_docs.Seek(term);

                    while (term_docs.Next())
                    {
                        Document doc = secondary_reader.Document(term_docs.Doc());

                        string child_uri_str = doc.Get("Uri");
                        Uri    child_uri     = UriFu.EscapedStringToUri(child_uri_str);
                        // Any valid lucene document *should* have a Uri, so no need to check for null
                        // Store the child documents too, to save persistent-properties
                        // of child documents
                        this_parent_child_props [child_uri] = doc;
                    }
                }

                // Now remove (non-remove indexables will be re-added in next block)
                Logger.Log.Debug("-{0}", indexable.DisplayUri);

                int num_delete = 0;

                term = new Term("Uri", uri_str);
                // For property changes, only secondary index is modified
                secondary_reader.DeleteDocuments(term);

                // Now remove from everywhere else (if asked to remove or if asked to add, in which case
                // we first remove and then add)
                // So we also need to remove child documents
                if (indexable.Type != IndexableType.PropertyChange)
                {
                    num_delete = primary_reader.DeleteDocuments(term);

                    // When we delete an indexable, also delete any children.
                    // FIXME: Shouldn't we also delete any children of children, etc.?
                    term        = new Term("ParentUri", uri_str);
                    num_delete += primary_reader.DeleteDocuments(term);
                    secondary_reader.DeleteDocuments(term);
                }

                // If this is a strict removal (and not a deletion that
                // we are doing in anticipation of adding something back),
                // queue up a removed receipt.
                if (indexable.Type == IndexableType.Remove)
                {
                    IndexerRemovedReceipt r;
                    r            = new IndexerRemovedReceipt(indexable.Id);
                    r.NumRemoved = num_delete;
                    receipt_queue.Add(r);
                }

                delete_count += num_delete;
            }

            term_docs.Close();

            if (HaveItemCount)
            {
                AdjustItemCount(-delete_count);
            }
            else
            {
                SetItemCount(primary_reader);
            }

            // We are now done with the readers, so we close them.
            // And also free them. Somehow not freeing them is preventing them from
            // GCed at all.
            primary_reader.Close();
            primary_reader = null;
            secondary_reader.Close();
            secondary_reader = null;

            // FIXME: If we crash at exactly this point, we are in
            // trouble.  Items will have been dropped from the index
            // without the proper replacements being added.  We can
            // hopefully fix this when we move to Lucene 2.1.

            // Step #2: Make another pass across our list of indexables
            // and write out any new documents.

            if (text_cache != null)
            {
                text_cache.BeginTransaction();
            }

            IndexWriter primary_writer, secondary_writer;

            // FIXME: Lock obtain time-out can happen here; if that happens,
            // an exception will be thrown and this method will break in the middle
            // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification
            // methods.
            primary_writer   = new IndexWriter(PrimaryStore, IndexingAnalyzer, false);
            secondary_writer = null;

            foreach (Indexable indexable in request_indexables)
            {
                // If shutdown has been started, break here
                // FIXME: Some more processing will continue, a lot of them
                // concerning receipts, but the daemon will anyway ignore receipts
                // now, what is the fastest way to stop from here ?
                if (Shutdown.ShutdownRequested)
                {
                    Log.Debug("Shutdown initiated. Breaking while flushing indexables.");
                    break;
                }

                // Receipts for removes were generated in the
                // previous block.  Now we just have to remove
                // items from the text cache.
                if (indexable.Type == IndexableType.Remove)
                {
                    if (text_cache != null)
                    {
                        text_cache.Delete(indexable.Uri);
                    }

                    continue;
                }

                IndexerAddedReceipt r;
                Hashtable           prop_change_docs = (Hashtable)parent_child_old_props [indexable.Uri];

                if (indexable.Type == IndexableType.PropertyChange)
                {
                    Logger.Log.Debug("+{0} (props only)", indexable.DisplayUri);

                    r = new IndexerAddedReceipt(indexable.Id);
                    r.PropertyChangesOnly = true;
                    receipt_queue.Add(r);

                    Document doc;
                    if (prop_change_docs == null)
                    {
                        doc = null;
                    }
                    else
                    {
                        doc = (Document)prop_change_docs [indexable.Uri];
                    }

                    Document new_doc;
                    new_doc = RewriteDocument(doc, indexable);

                    // Write out the new document...
                    if (secondary_writer == null)
                    {
                        secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                    }
                    secondary_writer.AddDocument(new_doc);

                    // Get child property change indexables...
                    ArrayList prop_change_indexables;
                    prop_change_indexables = GetChildPropertyChange(prop_change_docs, indexable);
                    // and store them; no need to delete them first, since they were already removed from the index
                    if (prop_change_indexables == null)
                    {
                        continue;
                    }

                    foreach (Indexable prop_change_indexable in prop_change_indexables)
                    {
                        Log.Debug("+{0} (props only, generated indexable)", prop_change_indexable.Uri);
                        doc     = (Document)prop_change_docs [prop_change_indexable.Uri];
                        new_doc = RewriteDocument(doc, prop_change_indexable);
                        secondary_writer.AddDocument(new_doc);
                    }

                    continue;                     // ...and proceed to the next Indexable
                }

                // If we reach this point we know we are dealing with an IndexableType.Add

                if (indexable.Type != IndexableType.Add)
                {
                    throw new Exception("When I said it was an IndexableType.Add, I meant it!");
                }

                r = AddIndexableToIndex(indexable, primary_writer, ref secondary_writer, prop_change_docs);
                if (r != null)
                {
                    receipt_queue.Add(r);
                }
            }

            if (text_cache != null)
            {
                text_cache.CommitTransaction();
            }

            if (Shutdown.ShutdownRequested)
            {
                foreach (DeferredInfo di in deferred_indexables)
                {
                    di.Cleanup();
                }
                deferred_indexables.Clear();

                foreach (Indexable indexable in request_indexables)
                {
                    indexable.Cleanup();
                }

                primary_writer.Close();
                if (secondary_writer != null)
                {
                    secondary_writer.Close();
                }

                return(null);
            }

            if (request.OptimizeIndex)
            {
                Stopwatch watch = new Stopwatch();
                Logger.Log.Debug("Optimizing {0}", IndexName);
                watch.Start();
                primary_writer.Optimize();
                if (secondary_writer == null)
                {
                    secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false);
                }
                secondary_writer.Optimize();
                watch.Stop();
                Logger.Log.Debug("{0} optimized in {1}", IndexName, watch);
            }

            // Step #4. Close our writers and return the events to
            // indicate what has happened.

            primary_writer.Close();
            if (secondary_writer != null)
            {
                secondary_writer.Close();
            }

            // Send a single IndexerIndexablesReceipt if there were deferred indexables
            if (deferred_indexables.Count > 0)
            {
                Log.Debug("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count);
                IndexerIndexablesReceipt r = new IndexerIndexablesReceipt();
                receipt_queue.Add(r);
            }

            IndexerReceipt [] receipt_array;
            receipt_array = new IndexerReceipt [receipt_queue.Count];
            for (int i = 0; i < receipt_queue.Count; ++i)
            {
                receipt_array [i] = (IndexerReceipt)receipt_queue [i];
            }

            return(receipt_array);
        }
コード例 #30
0
        public virtual void  testSkipTo(int indexDivisor)
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Term ta = new Term("content", "aaa");

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, "aaa aaa aaa aaa");
            }

            Term tb = new Term("content", "bbb");

            for (int i = 0; i < 16; i++)
            {
                AddDoc(writer, "bbb bbb bbb bbb");
            }

            Term tc = new Term("content", "ccc");

            for (int i = 0; i < 50; i++)
            {
                AddDoc(writer, "ccc ccc ccc ccc");
            }

            // assure that we deal with a single segment
            writer.Optimize();
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, null, true, indexDivisor);

            TermDocs tdocs = reader.TermDocs();

            // without optimization (assumption skipInterval == 16)

            // with next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(0, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(1, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(2, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // without next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(0, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(10, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(11, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(12, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // without next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(10, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(26, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(27, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(28, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            //without next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(26, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            tdocs.Close();
            reader.Close();
            dir.Close();
        }