public virtual void TestAddBinaryTwice()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            iwriter.Dispose();
            directory.Dispose();
        }
        public virtual void TestStartPositions()
        {
            Directory dir = NewDirectory();

            // mimic StopAnalyzer
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton());
            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer);
            Document doc = new Document();
            doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO));
            writer.AddDocument(doc);
            Document doc2 = new Document();
            doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO));
            writer.AddDocument(doc2);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // user queries on "starts-with quick"
            SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1);
            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            // user queries on "starts-with the quick"
            SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2);
            sfq = new SpanNotQuery(include, sfq);
            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
Beispiel #3
0
 // TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs
 public virtual void BuildIndex(Directory dir)
 {
     Random random = Random();
     MockAnalyzer analyzer = new MockAnalyzer(Random());
     analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
     IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
     Similarity provider = new MySimProvider(this);
     config.SetSimilarity(provider);
     RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
     LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
     int num = AtLeast(100);
     for (int i = 0; i < num; i++)
     {
         Document doc = docs.NextDoc();
         int boost = Random().Next(255);
         Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES);
         f.Boost = boost;
         doc.Add(f);
         writer.AddDocument(doc);
         doc.RemoveField(ByteTestField);
         if (Rarely())
         {
             writer.Commit();
         }
     }
     writer.Commit();
     writer.Dispose();
     docs.Dispose();
 }
Beispiel #4
0
        public virtual void TestMixedMerge()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy(2)));
            Document d = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);
            d.Add(f1);

            // this field will NOT have norms
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);
            d.Add(f2);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverese
            d.Add(NewField("f1", "this field has norms", customType));

            d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO));

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
        public virtual void Test()
        {
            Directory d = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            MyIndexWriter w = new MyIndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            // Try to make an index that requires merging:
            w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 2, 11));
            int numStartDocs = AtLeast(20);
            LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues());
            for (int docIDX = 0; docIDX < numStartDocs; docIDX++)
            {
                w.AddDocument(docs.NextDoc());
            }
            MergePolicy mp = w.Config.MergePolicy;
            int mergeAtOnce = 1 + w.GetSegmentInfosSize_Nunit();
            if (mp is TieredMergePolicy)
            {
                ((TieredMergePolicy)mp).MaxMergeAtOnce = mergeAtOnce;
            }
            else if (mp is LogMergePolicy)
            {
                ((LogMergePolicy)mp).MergeFactor = mergeAtOnce;
            }
            else
            {
                // skip test
                w.Dispose();
                d.Dispose();
                return;
            }

            AtomicBoolean doStop = new AtomicBoolean();
            w.Config.SetMaxBufferedDocs(2);
            ThreadClass t = new ThreadAnonymousInnerClassHelper(this, w, numStartDocs, docs, doStop);
            t.Start();
            w.ForceMerge(1);
            doStop.Set(true);
            t.Join();
            Assert.IsTrue(w.MergeCount.Get() <= 1, "merge count is " + w.MergeCount.Get());
            w.Dispose();
            d.Dispose();
            docs.Dispose();
        }
Beispiel #6
0
        public virtual void TestFloatNorms()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity provider = new MySimProvider(this);
            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            LineFileDocs docs = new LineFileDocs(Random());
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                Document doc = docs.NextDoc();
                float nextFloat = (float)Random().NextDouble();
                // Cast to a double to get more precision output to the string.
                Field f = new TextField(FloatTestField, "" + (double)nextFloat, Field.Store.YES);
                f.Boost = nextFloat;

                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(FloatTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues norms = open.GetNormValues(FloatTestField);
            Assert.IsNotNull(norms);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                float expected = Convert.ToSingle(document.Get(FloatTestField));
                Assert.AreEqual(expected, Number.IntBitsToFloat((int)norms.Get(i)), 0.0f);
            }
            open.Dispose();
            dir.Dispose();
            docs.Dispose();
        }
Beispiel #7
0
        public virtual void TestDemo()
        {
            Analyzer analyzer = new MockAnalyzer(random());

            // Store the index in memory:
            Directory directory = newDirectory();
            // To store an index on disk, use this instead:
            // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
            RandomIndexWriter iwriter  = new RandomIndexWriter(random(), directory, analyzer);
            Document          doc      = new Document();
            string            longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
            string            text     = "this is the text to be indexed. " + longTerm;

            doc.add(newTextField("fieldname", text, Field.Store.YES));
            iwriter.addDocument(doc);
            iwriter.close();

            // Now search the index:
            IndexReader   ireader   = DirectoryReader.open(directory); // read-only=true
            IndexSearcher isearcher = newSearcher(ireader);

            Assert.AreEqual(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
            Query   query = new TermQuery(new Term("fieldname", "text"));
            TopDocs hits  = isearcher.search(query, null, 1);

            Assert.AreEqual(1, hits.totalHits);
            // Iterate through the results:
            for (int i = 0; i < hits.scoreDocs.length; i++)
            {
                Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
                Assert.AreEqual(text, hitDoc.get("fieldname"));
            }

            // Test simple phrase query
            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.add(new Term("fieldname", "to"));
            phraseQuery.add(new Term("fieldname", "be"));
            Assert.AreEqual(1, isearcher.search(phraseQuery, null, 1).totalHits);

            ireader.close();
            directory.close();
        }
Beispiel #8
0
        public virtual void TestDemo()
        {
            Analyzer analyzer = new MockAnalyzer(random());

            // Store the index in memory:
            Directory directory = newDirectory();
            // To store an index on disk, use this instead:
            // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
            RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, analyzer);
            Document doc = new Document();
            string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
            string text = "this is the text to be indexed. " + longTerm;
            doc.add(newTextField("fieldname", text, Field.Store.YES));
            iwriter.addDocument(doc);
            iwriter.close();

            // Now search the index:
            IndexReader ireader = DirectoryReader.open(directory); // read-only=true
            IndexSearcher isearcher = newSearcher(ireader);

            Assert.AreEqual(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
            Query query = new TermQuery(new Term("fieldname", "text"));
            TopDocs hits = isearcher.search(query, null, 1);
            Assert.AreEqual(1, hits.totalHits);
            // Iterate through the results:
            for (int i = 0; i < hits.scoreDocs.length; i++)
            {
              Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
              Assert.AreEqual(text, hitDoc.get("fieldname"));
            }

            // Test simple phrase query
            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.add(new Term("fieldname", "to"));
            phraseQuery.add(new Term("fieldname", "be"));
            Assert.AreEqual(1, isearcher.search(phraseQuery, null, 1).totalHits);

            ireader.close();
            directory.close();
        }
Beispiel #9
0
        public virtual void TestMmapIndex()
        {
            // sometimes the directory is not cleaned by rmDir, because on Windows it
            // may take some time until the files are finally dereferenced. So clean the
            // directory up front, or otherwise new IndexWriter will fail.
            var dirPath = CreateTempDir("testLuceneMmap");
            RmDir(dirPath.FullName);
            var dir = new MMapDirectory(dirPath, null);

            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            using (var analyzer = new MockAnalyzer(Random()))
            {
                // TODO: something about lock timeouts and leftover locks.
                using (var writer = new IndexWriter(dir,
                    new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(
                        IndexWriterConfig.OpenMode_e.CREATE)))
                {
                    writer.Commit();
                    using (IndexReader reader = DirectoryReader.Open(dir))
                    {
                        var searcher = NewSearcher(reader);
                        var num = AtLeast(1000);
                        for (int dx = 0; dx < num; dx++)
                        {
                            var f = RandomField();
                            var doc = new Document();
                            doc.Add(NewTextField("data", f, Field.Store.YES));
                            writer.AddDocument(doc);
                        }
                    }
                }

                RmDir(dirPath.FullName);
            }
        }
        public virtual void Test()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);
            LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues());
            int charsToIndex = AtLeast(100000);
            int charsIndexed = 0;
            //System.out.println("bytesToIndex=" + charsToIndex);
            while (charsIndexed < charsToIndex)
            {
                Document doc = docs.NextDoc();
                charsIndexed += doc.Get("body").Length;
                w.AddDocument(doc);
                //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
            }
            IndexReader r = w.Reader;
            //System.out.println("numDocs=" + r.NumDocs);
            w.Dispose();

            IndexSearcher s = NewSearcher(r);
            Terms terms = MultiFields.GetFields(r).Terms("body");
            int termCount = 0;
            TermsEnum termsEnum = terms.Iterator(null);
            while (termsEnum.Next() != null)
            {
                termCount++;
            }
            Assert.IsTrue(termCount > 0);

            // Target ~10 terms to search:
            double chance = 10.0 / termCount;
            termsEnum = terms.Iterator(termsEnum);
            IDictionary<BytesRef, TopDocs> answers = new Dictionary<BytesRef, TopDocs>();
            while (termsEnum.Next() != null)
            {
                if (Random().NextDouble() <= chance)
                {
                    BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term());
                    answers[term] = s.Search(new TermQuery(new Term("body", term)), 100);
                }
            }

            if (answers.Count > 0)
            {
                CountdownEvent startingGun = new CountdownEvent(1);
                int numThreads = TestUtil.NextInt(Random(), 2, 5);
                ThreadClass[] threads = new ThreadClass[numThreads];
                for (int threadID = 0; threadID < numThreads; threadID++)
                {
                    ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, s, answers, startingGun);
                    threads[threadID] = thread;
                    thread.Start();
                }
                startingGun.Signal();
                foreach (ThreadClass thread in threads)
                {
                    thread.Join();
                }
            }
            r.Dispose();
            dir.Dispose();
        }
Beispiel #11
0
        public virtual void TestOmitTermFreqAndPositions()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document d = new Document();

            // this field will have Tf
            Field f1 = NewField("f1", "this field has term freqs", NormalType);
            d.Add(f1);

            // this field will NOT have Tf
            Field f2 = NewField("f2", "this field has NO Tf in all docs", OmitType);
            d.Add(f2);

            writer.AddDocument(d);
            writer.ForceMerge(1);
            // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverse
            f1 = NewField("f1", "this field has term freqs", OmitType);
            d.Add(f1);

            f2 = NewField("f2", "this field has NO Tf in all docs", NormalType);
            d.Add(f2);

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").FieldIndexOptions, "OmitTermFreqAndPositions field bit should be set.");
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").FieldIndexOptions, "OmitTermFreqAndPositions field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
Beispiel #12
0
        public virtual void TestNoNrmFile()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy;
            lmp.MergeFactor = 2;
            lmp.NoCFSRatio = 0.0;
            Document d = new Document();

            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f1 = NewField("f1", "this field has no norms", customType);
            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            writer.Commit();

            AssertNoNrm(ram);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            AssertNoNrm(ram);
            ram.Dispose();
        }
        public virtual void TestRollbackAndCommitWithThreads()
        {
            BaseDirectoryWrapper d = NewDirectory();
            if (d is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)d).PreventDoubleWrite = false;
            }

            int threadCount = TestUtil.NextInt(Random(), 2, 6);

            AtomicReference<IndexWriter> writerRef = new AtomicReference<IndexWriter>();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            writerRef.Value = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            LineFileDocs docs = new LineFileDocs(Random());
            ThreadClass[] threads = new ThreadClass[threadCount];
            int iters = AtLeast(100);
            AtomicBoolean failed = new AtomicBoolean();
            ReentrantLock rollbackLock = new ReentrantLock();
            ReentrantLock commitLock = new ReentrantLock();
            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, d, writerRef, docs, iters, failed, rollbackLock, commitLock);
                threads[threadID].Start();
            }

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID].Join();
            }

            Assert.IsTrue(!failed.Get());
            writerRef.Value.Dispose();
            d.Dispose();
        }
        public virtual void TestReuseDocsEnumDifferentReader()
        {
            Directory dir = NewDirectory();
            Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp));
            int numdocs = AtLeast(20);
            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader firstReader = DirectoryReader.Open(dir);
            DirectoryReader secondReader = DirectoryReader.Open(dir);
            IList<AtomicReaderContext> leaves = firstReader.Leaves;
            IList<AtomicReaderContext> leaves2 = secondReader.Leaves;

            foreach (AtomicReaderContext ctx in leaves)
            {
                Terms terms = ((AtomicReader)ctx.Reader).Terms("body");
                TermsEnum iterator = terms.Iterator(null);
                IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc);
                iterator = terms.Iterator(null);
                DocsEnum docs = null;
                BytesRef term = null;
                while ((term = iterator.Next()) != null)
                {
                    docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);

                iterator = terms.Iterator(null);
                enums.Clear();
                docs = null;
                while ((term = iterator.Next()) != null)
                {
                    docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, firstReader, secondReader, dir);
        }
Beispiel #15
0
        public virtual void TestPhraseQueryPositionIncrements()
        {
            PhraseQuery expected = new PhraseQuery();
            expected.Add(new Term("field", "1"));
            expected.Add(new Term("field", "2"), 2);

            CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton());

            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList);

            QueryBuilder builder = new QueryBuilder(analyzer);
            Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2"));
        }
Beispiel #16
0
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2)));

            StringBuilder sb = new StringBuilder(265);
            string term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document doc = new Document();
                sb.Append(term).Append(" ");
                string content = sb.ToString();
                Field noTf = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), OmitType);
                doc.Add(noTf);

                Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), NormalType);
                doc.Add(tf);

                writer.AddDocument(doc);
                //System.out.println(d);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             */
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);
            searcher.Similarity = new SimpleSimilarity();

            Term a = new Term("noTf", term);
            Term b = new Term("tf", term);
            Term c = new Term("noTf", "notf");
            Term d = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d);

            PhraseQuery pq = new PhraseQuery();
            pq.Add(a);
            pq.Add(c);
            try
            {
                searcher.Search(pq, 10);
                Assert.Fail("did not hit expected exception");
            }
            catch (Exception e)
            {
                Exception cause = e;
                // If the searcher uses an executor service, the IAE is wrapped into other exceptions
                while (cause.InnerException != null)
                {
                    cause = cause.InnerException;
                }
                if (!(cause is InvalidOperationException))
                {
                    throw new InvalidOperationException("Expected an IAE", e);
                } // else OK because positions are not indexed
            }

            searcher.Search(q1, new CountingHitCollectorAnonymousInnerClassHelper(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q2, new CountingHitCollectorAnonymousInnerClassHelper2(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q3, new CountingHitCollectorAnonymousInnerClassHelper3(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q4, new CountingHitCollectorAnonymousInnerClassHelper4(this));
            //System.out.println(CountingHitCollector.getCount());

            BooleanQuery bq = new BooleanQuery();
            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new CountingHitCollectorAnonymousInnerClassHelper5(this));
            Assert.AreEqual(15, CountingHitCollector.Count);

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestNoPrxFile()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy;
            lmp.MergeFactor = 2;
            lmp.NoCFSRatio = 0.0;
            Document d = new Document();

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            Field f1 = NewField("f1", "this field has term freqs", ft);
            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            writer.Commit();

            AssertNoPrx(ram);

            // now add some documents with positions, and check there is no prox after optimization
            d = new Document();
            f1 = NewTextField("f1", "this field has positions", Field.Store.NO);
            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            AssertNoPrx(ram);
            ram.Dispose();
        }
        public virtual void TestBooleanSpanQuery()
        {
            bool failed = false;
            int hits = 0;
            Directory directory = NewDirectory();
            Analyzer indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            string FIELD = "content";
            Document d = new Document();
            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher = NewSearcher(indexReader);

            BooleanQuery query = new BooleanQuery();
            SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
            query.Add(sq1, BooleanClause.Occur.SHOULD);
            query.Add(sq2, BooleanClause.Occur.SHOULD);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
            searcher.Search(query, collector);
            hits = collector.TopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries");
            Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries");
            directory.Dispose();
        }
        public virtual void TestTwoFieldsTwoFormats()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            DocValuesFormat fast = DocValuesFormat.ForName("Lucene45");
            DocValuesFormat slow = DocValuesFormat.ForName("Lucene45");
            iwc.SetCodec(new Lucene46CodecAnonymousInnerClassHelper(this, fast, slow));
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
            string text = "this is the text to be indexed. " + longTerm;
            doc.Add(NewTextField("fieldname", text, Field.Store.YES));
            doc.Add(new NumericDocValuesField("dv1", 5));
            doc.Add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
            iwriter.AddDocument(doc);
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            IndexSearcher isearcher = NewSearcher(ireader);

            Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
            Query query = new TermQuery(new Term("fieldname", "text"));
            TopDocs hits = isearcher.Search(query, null, 1);
            Assert.AreEqual(1, hits.TotalHits);
            BytesRef scratch = new BytesRef();
            // Iterate through the results:
            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
                Assert.AreEqual(text, hitDoc.Get("fieldname"));
                Debug.Assert(ireader.Leaves.Count == 1);
                NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv1");
                Assert.AreEqual(5, dv.Get(hits.ScoreDocs[i].Doc));
                BinaryDocValues dv2 = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv2");
                dv2.Get(hits.ScoreDocs[i].Doc, scratch);
                Assert.AreEqual(new BytesRef("hello world"), scratch);
            }

            ireader.Dispose();
            directory.Dispose();
        }
Beispiel #20
0
        public virtual void TestSortedTermsEnum()
        {
            Directory directory = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();
            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;
            iwriter.Dispose();

            AtomicReader ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");
            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.TermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord());

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord());
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord());
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord());
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord());
            ireader.Dispose();
            directory.Dispose();
        }
        public virtual void TestEndOffsetPositionWithCachingTokenFilter()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document doc = new Document();
            IOException priorException = null;
            TokenStream stream = analyzer.TokenStream("field", new StringReader("abcd   "));
            try
            {
                stream.Reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
                TokenStream cachedStream = new CachingTokenFilter(stream);
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectors = true;
                customType.StoreTermVectorPositions = true;
                customType.StoreTermVectorOffsets = true;
                Field f = new Field("field", cachedStream, customType);
                doc.Add(f);
                doc.Add(f);
                w.AddDocument(doc);
            }
            catch (IOException e)
            {
                priorException = e;
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(priorException, stream);
            }
            w.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            TermsEnum termsEnum = r.GetTermVectors(0).Terms("field").Iterator(null);
            Assert.IsNotNull(termsEnum.Next());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
            Assert.AreEqual(2, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            r.Dispose();
            dir.Dispose();
        }
        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());
        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random random = new Random(Random().Next());
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);
            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
            TaskScheduler es = null;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet<string> delIDs = new ConcurrentHashSet<string>(new HashSet<string>());
            ISet<string> delPackIDs = new ConcurrentHashSet<string>(new HashSet<string>());
            IList<SubDocs> allSubDocs = new SynchronizedCollection<SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;
            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));
            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",",  delIDs.ToArray()));
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!
            /*if (es != null)
            {
              es.shutdown();
              es.awaitTermination(1, TimeUnit.SECONDS);
            }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }
        // Produces a realistic unicode random string that
        // survives MockAnalyzer unchanged:
        private string GetRandomTerm(string other)
        {
            Analyzer a = new MockAnalyzer(Random());
            while (true)
            {
                string s = TestUtil.RandomRealisticUnicodeString(Random());
                if (other != null && s.Equals(other))
                {
                    continue;
                }
                IOException priorException = null;
                TokenStream ts = a.TokenStream("foo", new StringReader(s));
                try
                {
                    ITermToBytesRefAttribute termAtt = ts.GetAttribute<ITermToBytesRefAttribute>();
                    BytesRef termBytes = termAtt.BytesRef;
                    ts.Reset();

                    int count = 0;
                    bool changed = false;

                    while (ts.IncrementToken())
                    {
                        termAtt.FillBytesRef();
                        if (count == 0 && !termBytes.Utf8ToString().Equals(s))
                        {
                            // The value was changed during analysis.  Keep iterating so the
                            // tokenStream is exhausted.
                            changed = true;
                        }
                        count++;
                    }

                    ts.End();
                    // Did we iterate just once and the value was unchanged?
                    if (!changed && count == 1)
                    {
                        return s;
                    }
                }
                catch (IOException e)
                {
                    priorException = e;
                }
                finally
                {
                    IOUtils.CloseWhileHandlingException(priorException, ts);
                }
            }
        }
Beispiel #24
0
        public virtual void TestGiga()
        {
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            Directory index = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), index);

            AddDoc("Lucene in Action", w);
            AddDoc("Lucene for Dummies", w);

            //addDoc("Giga", w);
            AddDoc("Giga byte", w);

            AddDoc("ManagingGigabytesManagingGigabyte", w);
            AddDoc("ManagingGigabytesManagingGigabytes", w);

            AddDoc("The Art of Computer Science", w);
            AddDoc("J. K. Rowling", w);
            AddDoc("JK Rowling", w);
            AddDoc("Joanne K Roling", w);
            AddDoc("Bruce Willis", w);
            AddDoc("Willis bruce", w);
            AddDoc("Brute willis", w);
            AddDoc("B. willis", w);
            IndexReader r = w.Reader;
            w.Dispose();

            Query q = new FuzzyQuery(new Term("field", "giga"), 0);

            // 3. search
            IndexSearcher searcher = NewSearcher(r);
            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("Giga byte", searcher.Doc(hits[0].Doc).Get("field"));
            r.Dispose();
            index.Dispose();
        }
        public virtual void TestPositions()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document d = new Document();

            // f1,f2,f3: docs only
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;

            Field f1 = NewField("f1", "this field has docs only", ft);
            d.Add(f1);

            Field f2 = NewField("f2", "this field has docs only", ft);
            d.Add(f2);

            Field f3 = NewField("f3", "this field has docs only", ft);
            d.Add(f3);

            FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED);
            ft2.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;

            // f4,f5,f6 docs and freqs
            Field f4 = NewField("f4", "this field has docs and freqs", ft2);
            d.Add(f4);

            Field f5 = NewField("f5", "this field has docs and freqs", ft2);
            d.Add(f5);

            Field f6 = NewField("f6", "this field has docs and freqs", ft2);
            d.Add(f6);

            FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED);
            ft3.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;

            // f7,f8,f9 docs/freqs/positions
            Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3);
            d.Add(f7);

            Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3);
            d.Add(f8);

            Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3);
            d.Add(f9);

            writer.AddDocument(d);
            writer.ForceMerge(1);

            // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
            // and docs/freqs/positions for f3, f6, f9
            d = new Document();

            // f1,f4,f7: docs only
            f1 = NewField("f1", "this field has docs only", ft);
            d.Add(f1);

            f4 = NewField("f4", "this field has docs only", ft);
            d.Add(f4);

            f7 = NewField("f7", "this field has docs only", ft);
            d.Add(f7);

            // f2, f5, f8: docs and freqs
            f2 = NewField("f2", "this field has docs and freqs", ft2);
            d.Add(f2);

            f5 = NewField("f5", "this field has docs and freqs", ft2);
            d.Add(f5);

            f8 = NewField("f8", "this field has docs and freqs", ft2);
            d.Add(f8);

            // f3, f6, f9: docs and freqs and positions
            f3 = NewField("f3", "this field has docs and freqs and positions", ft3);
            d.Add(f3);

            f6 = NewField("f6", "this field has docs and freqs and positions", ft3);
            d.Add(f6);

            f9 = NewField("f9", "this field has docs and freqs and positions", ft3);
            d.Add(f9);

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            // docs + docs = docs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").FieldIndexOptions);
            // docs + docs/freqs = docs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").FieldIndexOptions);
            // docs + docs/freqs/pos = docs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").FieldIndexOptions);
            // docs/freqs + docs = docs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").FieldIndexOptions);
            // docs/freqs + docs/freqs = docs/freqs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").FieldIndexOptions);
            // docs/freqs + docs/freqs/pos = docs/freqs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").FieldIndexOptions);
            // docs/freqs/pos + docs = docs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").FieldIndexOptions);
            // docs/freqs/pos + docs/freqs = docs/freqs
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").FieldIndexOptions);
            // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
            Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").FieldIndexOptions);

            reader.Dispose();
            ram.Dispose();
        }
Beispiel #26
0
        public virtual void TestCustomEncoder()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            config.SetSimilarity(new CustomNormEncodingSimilarity(this));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            Document doc = new Document();
            Field foo = NewTextField("foo", "", Field.Store.NO);
            Field bar = NewTextField("bar", "", Field.Store.NO);
            doc.Add(foo);
            doc.Add(bar);

            for (int i = 0; i < 100; i++)
            {
                bar.StringValue = "singleton";
                writer.AddDocument(doc);
            }

            IndexReader reader = writer.Reader;
            writer.Dispose();

            NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo");
            for (int i = 0; i < reader.MaxDoc; i++)
            {
                Assert.AreEqual(0, fooNorms.Get(i));
            }

            NumericDocValues barNorms = MultiDocValues.GetNormValues(reader, "bar");
            for (int i = 0; i < reader.MaxDoc; i++)
            {
                Assert.AreEqual(1, barNorms.Get(i));
            }

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors"));
            dir.PreventDoubleWrite = false;
            double rate = Random().NextDouble() * 0.01;
            //System.out.println("rate=" + rate);
            dir.RandomIOExceptionRateOnOpen = rate;
            int iters = AtLeast(20);
            LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues());
            IndexReader r = null;
            DirectoryReader r2 = null;
            bool any = false;
            MockDirectoryWrapper dirCopy = null;
            int lastNumDocs = 0;
            for (int iter = 0; iter < iters; iter++)
            {
                IndexWriter w = null;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                try
                {
                    MockAnalyzer analyzer = new MockAnalyzer(Random());
                    analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
                    IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

                    if (VERBOSE)
                    {
                        // Do this ourselves instead of relying on LTC so
                        // we see incrementing messageID:
                        iwc.InfoStream = new PrintStreamInfoStream(Console.Out);
                    }
                    var ms = iwc.MergeScheduler;
                    if (ms is IConcurrentMergeScheduler)
                    {
                        ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                    }
                    w = new IndexWriter(dir, iwc);
                    if (r != null && Random().Next(5) == 3)
                    {
                        if (Random().NextBoolean())
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes IR[]");
                            }
                            w.AddIndexes(new IndexReader[] { r });
                        }
                        else
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes Directory[]");
                            }
                            w.AddIndexes(new Directory[] { dirCopy });
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: addDocument");
                        }
                        w.AddDocument(docs.NextDoc());
                    }
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    w.Dispose();
                    w = null;

                    // NOTE: this is O(N^2)!  Only enable for temporary debugging:
                    //dir.setRandomIOExceptionRateOnOpen(0.0);
                    //TestUtil.CheckIndex(dir);
                    //dir.setRandomIOExceptionRateOnOpen(rate);

                    // Verify numDocs only increases, to catch IndexWriter
                    // accidentally deleting the index:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    Assert.IsTrue(DirectoryReader.IndexExists(dir));
                    if (r2 == null)
                    {
                        r2 = DirectoryReader.Open(dir);
                    }
                    else
                    {
                        DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2);
                        if (r3 != null)
                        {
                            r2.Dispose();
                            r2 = r3;
                        }
                    }
                    Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs);
                    lastNumDocs = r2.NumDocs;
                    //System.out.println("numDocs=" + lastNumDocs);
                    dir.RandomIOExceptionRateOnOpen = rate;

                    any = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": success");
                    }
                }
                catch (IOException ioe)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": exception");
                        Console.WriteLine(ioe.ToString());
                        Console.Write(ioe.StackTrace);
                    }
                    if (w != null)
                    {
                        // NOTE: leave random IO exceptions enabled here,
                        // to verify that rollback does not try to write
                        // anything:
                        w.Rollback();
                    }
                }

                if (any && r == null && Random().NextBoolean())
                {
                    // Make a copy of a non-empty index so we can use
                    // it to addIndexes later:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    r = DirectoryReader.Open(dir);
                    dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy"));
                    HashSet<string> files = new HashSet<string>();
                    foreach (string file in dir.ListAll())
                    {
                        dir.Copy(dirCopy, file, file, IOContext.DEFAULT);
                        files.Add(file);
                    }
                    dirCopy.Sync(files);
                    // Have IW kiss the dir so we remove any leftover
                    // files ... we can easily have leftover files at
                    // the time we take a copy because we are holding
                    // open a reader:
                    (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))).Dispose();
                    dirCopy.RandomIOExceptionRate = rate;
                    dir.RandomIOExceptionRateOnOpen = rate;
                }
            }

            if (r2 != null)
            {
                r2.Dispose();
            }
            if (r != null)
            {
                r.Dispose();
                dirCopy.Dispose();
            }
            dir.Dispose();
        }
            public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes)
            {
                this.OuterInstance = outerInstance;
                MyNodeID = nodeID;
                Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase"));
                // TODO: set warmer
                MockAnalyzer analyzer = new MockAnalyzer(Random());
                analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
                IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
                iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
                if (VERBOSE)
                {
                    iwc.InfoStream = new PrintStreamInfoStream(Console.Out);
                }
                Writer = new IndexWriter(Dir, iwc);
                Mgr = new SearcherManager(Writer, true, null);
                Searchers = new SearcherLifetimeManager();

                // Init w/ 0s... caller above will do initial
                // "broadcast" by calling initSearcher:
                CurrentNodeVersions = new long[numNodes];
            }
        public virtual void TestRollingUpdates_Mem()
        {
            Random random = new Random(Random().Next());
            BaseDirectoryWrapper dir = NewDirectory();
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());

            //provider.register(new MemoryCodec());
            // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over
            //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
            //{
            //    Codec.Default =
            //        TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat()));
            //}

            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int SIZE = AtLeast(20);
            int id = 0;
            IndexReader r = null;
            IndexSearcher s = null;
            int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));
            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;
            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc = docs.NextDoc();
                string myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).StringValue = myID;

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && Random().Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = Random().NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs());

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();
            infos.Read(dir);
            long totalBytes = 0;
            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.SizeInBytes();
            }
            long totalBytes2 = 0;
            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }
        public virtual void TestTooLargeTermSortedSetBytes()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            byte[] bytes = new byte[100000];
            BytesRef b = new BytesRef(bytes);
            Random().NextBytes((byte[])(Array)bytes);
            doc.Add(new SortedSetDocValuesField("dv", b));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("did not get expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }
            iwriter.Dispose();
            directory.Dispose();
        }
Beispiel #31
0
        public virtual void Test()
        {
            Random random = new Random(Random().Next());
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            Directory d = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, analyzer);
            int numDocs = AtLeast(10);
            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                w.AddDocument(docs.NextDoc());
            }
            IndexReader r = w.Reader;
            w.Dispose();

            List<BytesRef> terms = new List<BytesRef>();
            TermsEnum termsEnum = MultiFields.GetTerms(r, "body").Iterator(null);
            BytesRef term;
            while ((term = termsEnum.Next()) != null)
            {
                terms.Add(BytesRef.DeepCopyOf(term));
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + terms.Count + " terms");
            }

            int upto = -1;
            int iters = AtLeast(200);
            for (int iter = 0; iter < iters; iter++)
            {
                bool isEnd;
                if (upto != -1 && Random().NextBoolean())
                {
                    // next
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter next");
                    }
                    isEnd = termsEnum.Next() == null;
                    upto++;
                    if (isEnd)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  end");
                        }
                        Assert.AreEqual(upto, terms.Count);
                        upto = -1;
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got term=" + termsEnum.Term().Utf8ToString() + " expected=" + terms[upto].Utf8ToString());
                        }
                        Assert.IsTrue(upto < terms.Count);
                        Assert.AreEqual(terms[upto], termsEnum.Term());
                    }
                }
                else
                {
                    BytesRef target;
                    string exists;
                    if (Random().NextBoolean())
                    {
                        // likely fake term
                        if (Random().NextBoolean())
                        {
                            target = new BytesRef(TestUtil.RandomSimpleString(Random()));
                        }
                        else
                        {
                            target = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()));
                        }
                        exists = "likely not";
                    }
                    else
                    {
                        // real term
                        target = terms[Random().Next(terms.Count)];
                        exists = "yes";
                    }

                    upto = terms.BinarySearch(target);

                    if (Random().NextBoolean())
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekCeil
                        TermsEnum.SeekStatus status = termsEnum.SeekCeil(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + status);
                        }

                        if (upto < 0)
                        {
                            upto = -(upto + 1);
                            if (upto >= terms.Count)
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.END, status);
                                upto = -1;
                            }
                            else
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);
                                Assert.AreEqual(terms[upto], termsEnum.Term());
                            }
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status);
                            Assert.AreEqual(terms[upto], termsEnum.Term());
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekExact
                        bool result = termsEnum.SeekExact(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + result);
                        }
                        if (upto < 0)
                        {
                            Assert.IsFalse(result);
                            upto = -1;
                        }
                        else
                        {
                            Assert.IsTrue(result);
                            Assert.AreEqual(target, termsEnum.Term());
                        }
                    }
                }
            }

            r.Dispose();
            d.Dispose();
            docs.Dispose();
        }