Example #1
0
        public virtual void  TestNPESpanQuery()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), IndexWriter.MaxFieldLength.LIMITED);

            // Add documents
            AddDoc(writer, "1", "the big dogs went running to the market");
            AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly");

            // Commit
            writer.Close();

            // Get searcher
            IndexReader   reader   = IndexReader.Open(dir, true);
            IndexSearcher searcher = new IndexSearcher(reader);

            // Control (make sure docs indexed)
            Assert.AreEqual(2, HitCount(searcher, "the"));
            Assert.AreEqual(1, HitCount(searcher, "cat"));
            Assert.AreEqual(1, HitCount(searcher, "dogs"));
            Assert.AreEqual(0, HitCount(searcher, "rabbit"));

            // This throws exception (it shouldn't)
            Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).TotalHits);
            reader.Close();
            dir.Close();
        }
        public virtual void  TestCompressionTools()
        {
            Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES);
            Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES);

            Document doc = new Document();

            doc.Add(binaryFldCompressed);
            doc.Add(stringFldCompressed);

            /** add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Close();

            /** open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open(dir);
            Document    docFromReader = reader.Document(0);

            Assert.IsTrue(docFromReader != null);

            /** fetch the binary compressed field and compare it's content with the original one */
            System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed"))));
            Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed));
            Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed")).Equals(binaryValCompressed));

            reader.Close();
            dir.Close();
        }
        public virtual void  TestDanish()
        {
            /* build an index */
            RAMDirectory danishIndex = new RAMDirectory();
            IndexWriter  writer      = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);

            // Danish collation orders the words below in the given order
            // (example taken from TestSort.testInternationalSort() ).
            System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" };
            for (int docnum = 0; docnum < words.Length; ++docnum)
            {
                Document doc = new Document();
                doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.UN_TOKENIZED));
                writer.AddDocument(doc);
            }
            writer.Optimize();
            writer.Close();

            IndexReader   reader = IndexReader.Open(danishIndex);
            IndexSearcher search = new IndexSearcher(reader);

            System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo;

            // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
            // but Danish collation does.
            ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs;
            AssertEquals("The index Term should be included.", 1, result.Length);

            result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs;
            AssertEquals("The index Term should not be included.", 0, result.Length);
            search.Close();
        }
        public virtual void  TestFarsi()
        {
            /* build an index */
            RAMDirectory farsiIndex = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);
            Document     doc        = new Document();

            doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();

            IndexReader   reader = IndexReader.Open(farsiIndex);
            IndexSearcher search = new IndexSearcher(reader);

            // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
            // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
            // characters properly.
            System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("ar").CompareInfo;

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a ConstantScoreRangeQuery
            // with a Farsi Collator (or an Arabic one for the case when Farsi is
            // not supported).
            ScoreDoc[] result = search.Search(Csrq("content", "\u062F", "\u0698", T, T, c), null, 1000).scoreDocs;
            AssertEquals("The index Term should not be included.", 0, result.Length);

            result = search.Search(Csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs;
            AssertEquals("The index Term should be included.", 1, result.Length);
            search.Close();
        }
Example #5
0
        /// <summary> Given an IndexSearcher, returns a new IndexSearcher whose IndexReader
        /// is a MultiReader containing the Reader of the original IndexSearcher,
        /// as well as several "empty" IndexReaders -- some of which will have
        /// deleted documents in them.  This new IndexSearcher should
        /// behave exactly the same as the original IndexSearcher.
        /// </summary>
        /// <param name="s">the searcher to wrap
        /// </param>
        /// <param name="edge">if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub
        /// </param>
        public static IndexSearcher WrapUnderlyingReader(IndexSearcher s, int edge)
        {
            IndexReader r = s.IndexReader;

            // we can't put deleted docs before the nested reader, because
            // it will throw off the docIds
            IndexReader[] readers = new IndexReader[]
            {
                edge < 0 ? r : IndexReader.Open(MakeEmptyIndex(0), true),
                IndexReader.Open(MakeEmptyIndex(0), true),
                new MultiReader(new IndexReader[]
                {
                    IndexReader.Open(MakeEmptyIndex(edge < 0 ? 4 : 0), true),
                    IndexReader.Open(MakeEmptyIndex(0), true),
                    0 == edge ? r : IndexReader.Open(MakeEmptyIndex(0), true)
                }),
                IndexReader.Open(MakeEmptyIndex(0 < edge ? 0 : 7), true),
                IndexReader.Open(MakeEmptyIndex(0), true),
                new MultiReader(new IndexReader[]
                {
                    IndexReader.Open(MakeEmptyIndex(0 < edge ? 0 : 5), true),
                    IndexReader.Open(MakeEmptyIndex(0), true),
                    0 < edge ? r : IndexReader.Open(MakeEmptyIndex(0), true)
                })
            };
            IndexSearcher out_Renamed = new IndexSearcher(new MultiReader(readers));

            out_Renamed.Similarity = s.Similarity;
            return(out_Renamed);
        }
Example #6
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            long         theLong   = System.Int64.MaxValue;
            double       theDouble = System.Double.MaxValue;
            sbyte        theByte   = (sbyte)System.SByte.MaxValue;
            short        theShort  = System.Int16.MaxValue;
            int          theInt    = System.Int32.MaxValue;
            float        theFloat  = System.Single.MaxValue;

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc);
            }
            writer.Close();
            reader = IndexReader.Open(directory);
        }
Example #7
0
        public virtual void  TestRAMDirectory_Renamed()
        {
            Directory        dir    = FSDirectory.Open(indexDir);
            MockRAMDirectory ramDir = new MockRAMDirectory(dir);

            // close the underlaying directory
            dir.Close();

            // Check size
            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            // open reader to test document count
            IndexReader reader = IndexReader.Open(ramDir, true);

            Assert.AreEqual(docsToAdd, reader.NumDocs());

            // open search zo check if all doc's are there
            IndexSearcher searcher = new IndexSearcher(reader);

            // search for all documents
            for (int i = 0; i < docsToAdd; i++)
            {
                Document doc = searcher.Doc(i);
                Assert.IsTrue(doc.GetField("content") != null);
            }

            // cleanup
            reader.Close();
            searcher.Close();
        }
        public virtual void  Test()
        {
            IndexReader reader = null;

            try
            {
                reader = IndexReader.Open(directory);
                for (int i = 1; i <= numThreads; i++)
                {
                    TestTermPositionVectors(reader, i);
                }
            }
            catch (System.IO.IOException ioe)
            {
                Assert.Fail(ioe.Message);
            }
            finally
            {
                if (reader != null)
                {
                    try
                    {
                        /** close the opened reader */
                        reader.Close();
                    }
                    catch (System.IO.IOException ioe)
                    {
                        System.Console.Error.WriteLine(ioe.StackTrace);
                    }
                }
            }
        }
        public override void  SetUp()
        {
            base.SetUp();


            System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null, "B   2   4 5 6", "Y     3   5 6", null, "C     3     6", "X       4 5 6" };

            index = new RAMDirectory();
            IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            for (int i = 0; i < data.Length; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i)));
                doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));                     //Field.Keyword("all","all"));
                if (null != data[i])
                {
                    doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED));                     //Field.Text("data",data[i]));
                }
                writer.AddDocument(doc);
            }

            writer.Optimize();
            writer.Close();

            r = IndexReader.Open(index);
            s = new IndexSearcher(r);

            //System.out.println("Set up " + getName());
        }
        public virtual void  TestEqualScores()
        {
            // NOTE: uses index build in *this* setUp

            IndexReader   reader = IndexReader.Open(small, true);
            IndexSearcher search = new IndexSearcher(reader);

            ScoreDoc[] result;

            // some hits match more terms then others, score should be the same

            result = search.Search(Csrq("data", "1", "6", T, T), null, 1000).ScoreDocs;
            int numHits = result.Length;

            AssertEquals("wrong number of results", 6, numHits);
            float score = result[0].Score;

            for (int i = 1; i < numHits; i++)
            {
                AssertEquals("score for " + i + " was not the same", score, result[i].Score);
            }

            result  = search.Search(Csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).ScoreDocs;
            numHits = result.Length;
            AssertEquals("wrong number of results", 6, numHits);
            for (int i = 0; i < numHits; i++)
            {
                AssertEquals("score for " + i + " was not the same", score, result[i].Score);
            }
        }
Example #11
0
        /* Walk directory hierarchy in uid order, while keeping uid iterator from
         * /* existing index in sync.  Mismatches indicate one of: (a) old documents to
         * /* be deleted; (b) unchanged documents, to be left alone; or (c) new
         * /* documents, to be indexed.
         */

        private static void  IndexDocs(System.IO.FileInfo file, System.String index, bool create)
        {
            if (!create)
            {
                // incrementally update

                reader  = IndexReader.Open(index);                // open existing index
                uidIter = reader.Terms(new Term("uid", ""));      // init uid iterator

                IndexDocs(file);

                if (deleting)
                {
                    // delete rest of stale docs
                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid")
                    {
                        System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text()));
                        reader.Delete(uidIter.Term());
                        uidIter.Next();
                    }
                    deleting = false;
                }

                uidIter.Close();                // close uid iterator
                reader.Close();                 // close existing index
            }
            // don't have exisiting
            else
            {
                IndexDocs(file);
            }
        }
        public virtual void  TestBooleanOrderUnAffected()
        {
            // NOTE: uses index build in *this* setUp

            IndexReader   reader = IndexReader.Open(small);
            IndexSearcher search = new IndexSearcher(reader);

            // first do a regular TermRangeQuery which uses term expansion so
            // docs with more terms in range get higher scores

            Query rq = new TermRangeQuery("data", "1", "4", T, T);

            ScoreDoc[] expected = search.Search(rq, null, 1000).scoreDocs;
            int        numHits  = expected.Length;

            // now do a boolean where which also contains a
            // ConstantScoreRangeQuery and make sure hte order is the same

            BooleanQuery q = new BooleanQuery();

            q.Add(rq, BooleanClause.Occur.MUST);                           // T, F);
            q.Add(Csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST); // T, F);

            ScoreDoc[] actual = search.Search(q, null, 1000).scoreDocs;

            AssertEquals("wrong numebr of hits", numHits, actual.Length);
            for (int i = 0; i < numHits; i++)
            {
                AssertEquals("mismatch in docid for hit#" + i, expected[i].doc, actual[i].doc);
            }
        }
        public virtual void  TestFieldCacheRangeFilterDoubles()
        {
            IndexReader   reader = IndexReader.Open(signedIndex.index);
            IndexSearcher search = new IndexSearcher(reader);

            int numDocs = reader.NumDocs();

            System.Double minIdO = (double)(minId + .5);
            System.Double medIdO = (double)((float)minIdO + ((double)(maxId - minId)) / 2.0);

            ScoreDoc[] result;
            Query      q = new TermQuery(new Term("body", "body"));

            result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", minIdO, medIdO, T, T), numDocs).scoreDocs;
            Assert.AreEqual(numDocs / 2, result.Length, "find all");
            int count = 0;

            result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, medIdO, F, T), numDocs).scoreDocs;
            count += result.Length;
            result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", medIdO, null, F, F), numDocs).scoreDocs;
            count += result.Length;
            Assert.AreEqual(numDocs, count, "sum of two concenatted ranges");
            result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, null, T, T), numDocs).scoreDocs;
            Assert.AreEqual(numDocs, result.Length, "find all");
            System.Double tempAux = (double)System.Double.PositiveInfinity;
            result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", tempAux, null, F, F), numDocs).scoreDocs;
            Assert.AreEqual(0, result.Length, "infinity special case");
            System.Double tempAux2 = (double)System.Double.NegativeInfinity;
            result = search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, tempAux2, F, F), numDocs).scoreDocs;
            Assert.AreEqual(0, result.Length, "infinity special case");
        }
Example #14
0
        private static RAMDirectory MakeEmptyIndex(int numDeletedDocs)
        {
            RAMDirectory d = new RAMDirectory();
            IndexWriter  w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED);

            for (int i = 0; i < numDeletedDocs; i++)
            {
                w.AddDocument(new Document());
            }
            w.Commit();
            w.DeleteDocuments(new MatchAllDocsQuery());
            w.Commit();

            if (0 < numDeletedDocs)
            {
                Assert.IsTrue(w.HasDeletions(), "writer has no deletions");
            }

            Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs");
            Assert.AreEqual(0, w.NumDocs(), "writer has non-deleted docs");
            w.Close();
            IndexReader r = IndexReader.Open(d);

            Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs(), "reader has wrong number of deleted docs");
            r.Close();
            return(d);
        }
        public virtual void  TestFieldCacheRangeFilterRand()
        {
            IndexReader   reader = IndexReader.Open(signedIndex.index);
            IndexSearcher search = new IndexSearcher(reader);

            System.String minRP = Pad(signedIndex.minR);
            System.String maxRP = Pad(signedIndex.maxR);

            int numDocs = reader.NumDocs();

            Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs");

            ScoreDoc[] result;
            Query      q = new TermQuery(new Term("body", "body"));

            // test extremes, bounded on both ends

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, T), numDocs).scoreDocs;
            Assert.AreEqual(numDocs, result.Length, "find all");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, F), numDocs).scoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "all but biggest");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, T), numDocs).scoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "all but smallest");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, F), numDocs).scoreDocs;
            Assert.AreEqual(numDocs - 2, result.Length, "all but extremes");

            // unbounded

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, T, F), numDocs).scoreDocs;
            Assert.AreEqual(numDocs, result.Length, "smallest and up");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, T), numDocs).scoreDocs;
            Assert.AreEqual(numDocs, result.Length, "biggest and down");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, F, F), numDocs).scoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "not smallest, but up");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, F), numDocs).scoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "not biggest, but down");

            // very small sets

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, F, F), numDocs).scoreDocs;
            Assert.AreEqual(0, result.Length, "min,min,F,F");
            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, F, F), numDocs).scoreDocs;
            Assert.AreEqual(0, result.Length, "max,max,F,F");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, T, T), numDocs).scoreDocs;
            Assert.AreEqual(1, result.Length, "min,min,T,T");
            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, minRP, F, T), numDocs).scoreDocs;
            Assert.AreEqual(1, result.Length, "nul,min,F,T");

            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, T, T), numDocs).scoreDocs;
            Assert.AreEqual(1, result.Length, "max,max,T,T");
            result = search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, null, T, F), numDocs).scoreDocs;
            Assert.AreEqual(1, result.Length, "max,nul,T,T");
        }
Example #16
0
        public virtual void  TestFarsi()
        {
            /* build an index */
            RAMDirectory farsiIndex = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);
            Document     doc        = new Document();

            doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();

            IndexReader   reader = IndexReader.Open(farsiIndex, true);
            IndexSearcher search = new IndexSearcher(reader);
            Query         q      = new TermQuery(new Term("body", "body"));

            // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
            // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
            // characters properly.
            System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo;

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi is not supported).
            int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000).TotalHits;

            Assert.AreEqual(0, numHits, "The index Term should not be included.");

            numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000).TotalHits;
            Assert.AreEqual(1, numHits, "The index Term should be included.");
            search.Close();
        }
        public virtual void  TestCachingWorks()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.Close();

            IndexReader reader = IndexReader.Open(dir);

            MockFilter           filter = new MockFilter();
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // first time, nested filter is called
            cacher.GetDocIdSet(reader);
            Assert.IsTrue(filter.WasCalled(), "first time");

            // make sure no exception if cache is holding the wrong bitset
            cacher.Bits(reader);
            cacher.GetDocIdSet(reader);

            // second time, nested filter should not be called
            filter.Clear();
            cacher.GetDocIdSet(reader);
            Assert.IsFalse(filter.WasCalled(), "second time");

            reader.Close();
        }
Example #18
0
 /// <summary> Check whether the word exists in the index.</summary>
 /// <param name="word">String
 /// </param>
 /// <throws>  IOException </throws>
 /// <returns> true iff the word exists in the index
 /// </returns>
 public virtual bool Exist(System.String word)
 {
     if (reader == null)
     {
         reader = IndexReader.Open(spellindex);
     }
     return(reader.DocFreq(new Term(F_WORD, word)) > 0);
 }
        public virtual void  TestPhrasePrefix()
        {
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document     doc1       = new Document();
            Document     doc2       = new Document();
            Document     doc3       = new Document();
            Document     doc4       = new Document();
            Document     doc5       = new Document();

            doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED));
            doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED));
            doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED));
            doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED));
            doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc1);
            writer.AddDocument(doc2);
            writer.AddDocument(doc3);
            writer.AddDocument(doc4);
            writer.AddDocument(doc5);
            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore);

            //PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            //PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
            IndexReader ir = IndexReader.Open(indexStore);

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            System.String prefix = "pi";
            TermEnum      te     = ir.Terms(new Term("body", prefix + "*"));

            do
            {
                if (te.Term().Text().StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term());
                }
            }while (te.Next());

            query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).scoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000).scoreDocs;
            Assert.AreEqual(0, result.Length);
        }
Example #20
0
        public virtual void  TestBuild()
        {
            try
            {
                IndexReader r = IndexReader.Open(userindex);

                spellChecker.ClearIndex();

                Addwords(r, "field1");
                int num_field1 = this.Numdoc();

                Addwords(r, "field2");
                int num_field2 = this.Numdoc();

                Assert.AreEqual(num_field2, num_field1 + 1);

                // test small word
                System.String[] similar = spellChecker.SuggestSimilar("fvie", 2);
                Assert.AreEqual(1, similar.Length);
                Assert.AreEqual(similar[0], "five");

                similar = spellChecker.SuggestSimilar("five", 2);
                Assert.AreEqual(1, similar.Length);
                Assert.AreEqual(similar[0], "nine");                 // don't suggest a word for itself

                similar = spellChecker.SuggestSimilar("fiv", 2);
                Assert.AreEqual(1, similar.Length);
                Assert.AreEqual(similar[0], "five");

                similar = spellChecker.SuggestSimilar("ive", 2);
                Assert.AreEqual(1, similar.Length);
                Assert.AreEqual(similar[0], "five");

                similar = spellChecker.SuggestSimilar("fives", 2);
                Assert.AreEqual(1, similar.Length);
                Assert.AreEqual(similar[0], "five");

                similar = spellChecker.SuggestSimilar("fie", 2);
                Assert.AreEqual(1, similar.Length);
                Assert.AreEqual(similar[0], "five");

                similar = spellChecker.SuggestSimilar("fi", 2);
                Assert.AreEqual(0, similar.Length);

                // test restraint to a field
                similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false);
                Assert.AreEqual(0, similar.Length);                 // there isn't the term thousand in the field field1

                similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false);
                Assert.AreEqual(1, similar.Length);                 // there is the term thousand in the field field2
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.Fail();
            }
        }
Example #21
0
        public virtual void  TestBoost()
        {
            // NOTE: uses index build in *this* setUp

            IndexReader   reader = IndexReader.Open(small);
            IndexSearcher search = new IndexSearcher(reader);

            // test for correct application of query normalization
            // must use a non score normalizing method for this.
            Query q = Csrq("data", "1", "6", T, T);

            q.SetBoost(100);
            search.Search(q, null, new AnonymousClassCollector(this));

            //
            // Ensure that boosting works to score one clause of a query higher
            // than another.
            //
            Query q1 = Csrq("data", "A", "A", T, T);             // matches document #0

            q1.SetBoost(.1f);
            Query        q2 = Csrq("data", "Z", "Z", T, T);      // matches document #1
            BooleanQuery bq = new BooleanQuery(true);

            bq.Add(q1, BooleanClause.Occur.SHOULD);
            bq.Add(q2, BooleanClause.Occur.SHOULD);

            ScoreDoc[] hits = search.Search(bq, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits[0].doc);
            Assert.AreEqual(0, hits[1].doc);
            Assert.IsTrue(hits[0].score > hits[1].score);

            q1 = Csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);             // matches document #0
            q1.SetBoost(.1f);
            q2 = Csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);             // matches document #1
            bq = new BooleanQuery(true);
            bq.Add(q1, BooleanClause.Occur.SHOULD);
            bq.Add(q2, BooleanClause.Occur.SHOULD);

            hits = search.Search(bq, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits[0].doc);
            Assert.AreEqual(0, hits[1].doc);
            Assert.IsTrue(hits[0].score > hits[1].score);

            q1 = Csrq("data", "A", "A", T, T);             // matches document #0
            q1.SetBoost(10f);
            q2 = Csrq("data", "Z", "Z", T, T);             // matches document #1
            bq = new BooleanQuery(true);
            bq.Add(q1, BooleanClause.Occur.SHOULD);
            bq.Add(q2, BooleanClause.Occur.SHOULD);

            hits = search.Search(bq, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits[0].doc);
            Assert.AreEqual(1, hits[1].doc);
            Assert.IsTrue(hits[0].score > hits[1].score);
        }
Example #22
0
        private int Numdoc()
        {
            IndexReader rs  = IndexReader.Open(spellindex);
            int         num = rs.NumDocs();

            Assert.IsTrue(num != 0);
            //System.out.println("num docs: " + num);
            rs.Close();
            return(num);
        }
Example #23
0
        private int Numdoc()
        {
            var rs  = IndexReader.Open(spellindex, true);
            int num = rs.NumDocs();

            Assert.IsTrue(num != 0);

            rs.Close();
            return(num);
        }
Example #24
0
        public virtual void  TestMethod()
        {
            RAMDirectory directory = new RAMDirectory();

            System.String[] categories = new System.String[] { "food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink" };

            Query rw1 = null;
            Query rw2 = null;

            try
            {
                IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
                for (int i = 0; i < categories.Length; i++)
                {
                    Document doc = new Document();
                    doc.Add(Field.Keyword("category", categories[i]));
                    writer.AddDocument(doc);
                }
                writer.Close();

                IndexReader reader = IndexReader.Open(directory);
                PrefixQuery query  = new PrefixQuery(new Term("category", "foo"));

                rw1 = query.Rewrite(reader);

                BooleanQuery bq = new BooleanQuery();
                bq.Add(query, true, false);

                rw2 = bq.Rewrite(reader);
            }
            catch (System.IO.IOException e)
            {
                Assert.Fail(e.Message);
            }

            BooleanQuery bq1 = null;

            if (rw1 is BooleanQuery)
            {
                bq1 = (BooleanQuery)rw1;
            }

            BooleanQuery bq2 = null;

            if (rw2 is BooleanQuery)
            {
                bq2 = (BooleanQuery)rw2;
            }
            else
            {
                Assert.Fail("Rewrite");
            }

            Assert.AreEqual(bq1.GetClauses().Length, bq2.GetClauses().Length, "Number of Clauses Mismatch");
        }
Example #25
0
        public override void  SetUp()
        {
            base.SetUp();

            index = new RAMDirectory();
            IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetSimilarity(sim);

            // hed is the most important field, dek is secondary

            // d1 is an "ok" match for:  albino elephant
            {
                Document d1 = new Document();
                d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED));    //Field.Keyword("id", "d1"));
                d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
                d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant"));
                writer.AddDocument(d1);
            }

            // d2 is a "good" match for:  albino elephant
            {
                Document d2 = new Document();
                d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED));    //Field.Keyword("id", "d2"));
                d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
                d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));   //Field.Text("dek", "albino"));
                d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant"));
                writer.AddDocument(d2);
            }

            // d3 is a "better" match for:  albino elephant
            {
                Document d3 = new Document();
                d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED));    //Field.Keyword("id", "d3"));
                d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));   //Field.Text("hed", "albino"));
                d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
                writer.AddDocument(d3);
            }

            // d4 is the "best" match for:  albino elephant
            {
                Document d4 = new Document();
                d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED));    //Field.Keyword("id", "d4"));
                d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));   //Field.Text("hed", "albino"));
                d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
                d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));   //Field.Text("dek", "albino"));
                writer.AddDocument(d4);
            }

            writer.Close();

            r            = IndexReader.Open(index, true);
            s            = new IndexSearcher(r);
            s.Similarity = sim;
        }
Example #26
0
        public virtual void  TestSetBufferSize()
        {
            System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize"));
            MockFSDirectory    dir      = new MockFSDirectory(indexDir, NewRandom());

            try
            {
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                writer.SetUseCompoundFile(false);
                for (int i = 0; i < 37; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED));
                    writer.AddDocument(doc);
                }
                writer.Close();

                dir.allIndexInputs.Clear();

                IndexReader reader = IndexReader.Open(dir);
                Term        aaa    = new Term("content", "aaa");
                Term        bbb    = new Term("content", "bbb");
                Term        ccc    = new Term("content", "ccc");
                Assert.AreEqual(37, reader.DocFreq(ccc));
                reader.DeleteDocument(0);
                Assert.AreEqual(37, reader.DocFreq(aaa));
                dir.tweakBufferSizes();
                reader.DeleteDocument(4);
                Assert.AreEqual(reader.DocFreq(bbb), 37);
                dir.tweakBufferSizes();

                IndexSearcher searcher = new IndexSearcher(reader);
                ScoreDoc[]    hits     = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                dir.tweakBufferSizes();
                hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(1, hits.Length);
                hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                searcher.Close();
                reader.Close();
            }
            finally
            {
                _TestUtil.RmDir(indexDir);
            }
        }
Example #27
0
        public void TestConcurrentAccess()
        {
            Assert.AreEqual(1, searchers.Count);
            IndexReader r = IndexReader.Open(userindex, true);

            spellChecker.ClearIndex();
            Assert.AreEqual(2, searchers.Count);
            Addwords(r, "field1");
            Assert.AreEqual(3, searchers.Count);
            int num_field1 = this.Numdoc();

            Addwords(r, "field2");
            Assert.AreEqual(4, searchers.Count);
            int num_field2 = this.Numdoc();

            Assert.AreEqual(num_field2, num_field1 + 1);
            int numThreads = 5 + this.random.Next(5);

            SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r, this);
                spellCheckWorker.start();
                workers[i] = spellCheckWorker;
            }
            int iterations = 5 + random.Next(5);

            for (int i = 0; i < iterations; i++)
            {
                Thread.Sleep(100);
                // concurrently reset the spell index
                spellChecker.SetSpellIndex(this.spellindex);
                // for debug - prints the internal Open searchers
                // showSearchersOpen();
            }

            spellChecker.Close();
            joinAll(workers, 5000);

            for (int i = 0; i < workers.Length; i++)
            {
                Assert.False(workers[i].failed);
                Assert.True(workers[i].terminated);
            }
            // 4 searchers more than iterations
            // 1. at creation
            // 2. ClearIndex()
            // 2. and 3. during Addwords
            Assert.AreEqual(iterations + 4, searchers.Count);
            AssertSearchersClosed();
        }
Example #28
0
        public virtual void  TestLazyLoadThreadSafety()
        {
            r    = NewRandom();
            dir1 = new RAMDirectory();
            // test w/ field sizes bigger than the buffer of an index input
            BuildDir(dir1, 15, 5, 2000);

            // do many small tests so the thread locals go away inbetween
            for (int i = 0; i < 100; i++)
            {
                ir1 = IndexReader.Open(dir1);
                DoTest(10, 100);
            }
        }
        // test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0
        public void TestSparseIndex()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);

            for (int d = -20; d <= 20; d++)
            {
                Document doc = new Document();
                doc.Add(new Field("id", d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("body", "body", Field.Store.NO, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc);
            }

            writer.Optimize();
            writer.DeleteDocuments(new Term("id", "0"));
            writer.Close();

            IndexReader   reader = IndexReader.Open(dir, true);
            IndexSearcher Search = new IndexSearcher(reader);

            Assert.True(reader.HasDeletions);

            ScoreDoc[] result;
            Query      q = new TermQuery(new Term("body", "body"));

            FieldCacheRangeFilter <sbyte?> fcrf;

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100).ScoreDocs;
            Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(40, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100).ScoreDocs;
            Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(20, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100).ScoreDocs;
            Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(20, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100).ScoreDocs;
            Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(11, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100).ScoreDocs;
            Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0]).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(11, result.Length, "find all");
        }
        public virtual void  TestBinaryFieldInIndex()
        {
            IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES);
            IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);

            // binary fields with store off are not allowed
            Assert.Throws <ArgumentException>(
                () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO));

            Document doc = new Document();

            doc.Add(binaryFldStored);

            doc.Add(stringFldStored);

            /* test for field count */
            Assert.AreEqual(2, doc.fields_ForNUnit.Count);

            /* add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Close();

            /* open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open(dir, false);
            Document    docFromReader = reader.Document(0);

            Assert.IsTrue(docFromReader != null);

            /* fetch the binary stored field and compare it's content with the original one */
            System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored")));
            Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored));

            /* fetch the string field and compare it's content with the original one */
            System.String stringFldStoredTest = docFromReader.Get("stringStored");
            Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored));

            /* delete the document from index */
            reader.DeleteDocument(0);
            Assert.AreEqual(0, reader.NumDocs());

            reader.Close();
            dir.Close();
        }