Ejemplo n.º 1
0
        public virtual void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter rw = new RandomIndexWriter(Random(), dir);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES));
                rw.AddDocument(doc);
            }
            rw.Dispose();

            // If buffer size is small enough to cause a flush, errors ensue...
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND));

            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                string value = Convert.ToString(i);
                doc.Add(NewStringField("pk", value, Field.Store.YES));
                doc.Add(NewStringField("text", "foo", Field.Store.YES));
                w.UpdateDocument(new Term("pk", value), doc);
            }
            w.Rollback();

            IndexReader r = DirectoryReader.Open(dir);
            Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback");
            r.Dispose();
            dir.Dispose();
        }
 public virtual void Test()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     conf.SetCodec(new Lucene46Codec());
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf);
     Document doc = new Document();
     // these fields should sometimes get term vectors, etc
     Field idField = NewStringField("id", "", Field.Store.NO);
     Field bodyField = NewTextField("body", "", Field.Store.NO);
     Field dvField = new NumericDocValuesField("dv", 5);
     doc.Add(idField);
     doc.Add(bodyField);
     doc.Add(dvField);
     for (int i = 0; i < 100; i++)
     {
         idField.StringValue = Convert.ToString(i);
         bodyField.StringValue = TestUtil.RandomUnicodeString(Random());
         riw.AddDocument(doc);
         if (Random().Next(7) == 0)
         {
             riw.Commit();
         }
         // TODO: we should make a new format with a clean header...
         // if (Random().nextInt(20) == 0) {
         //  riw.DeleteDocuments(new Term("id", Integer.toString(i)));
         // }
     }
     riw.Dispose();
     CheckHeaders(dir);
     dir.Dispose();
 }
        public virtual void TestAddBinaryTwice()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            iwriter.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 4
0
        public static void BeforeClass()
        {
            Directory = NewDirectory();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, analyzer);

            Documents.Document doc = new Documents.Document();
            doc.Add(NewTextField("field", "one two three four five", Field.Store.YES));
            doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES));
            IndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES);
            doc.Add(repeatedField);
            doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            Reader = writer.Reader;
            writer.Dispose();

            Searcher = NewSearcher(Reader);
        }
Ejemplo n.º 5
0
 public override void SetUp()
 {
     base.SetUp();
     INDEX_SIZE = AtLeast(2000);
     Index = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Index);
     RandomGen random = new RandomGen(this, Random());
     for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the
     {
         // problem doesn't show up
         Document doc = new Document();
         if ((i % 5) != 0) // some documents must not have an entry in the first
         {
             // sort field
             doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES));
         }
         if ((i % 7) == 0) // some documents to match the query (see below)
         {
             doc.Add(NewTextField("content", "test", Field.Store.YES));
         }
         // every document has a defined 'mandant' field
         doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Query = new TermQuery(new Term("content", "test"));
 }
Ejemplo n.º 6
0
        public override void SetUp()
        {
            base.SetUp();
            Document doc;
            Rd1 = NewDirectory();
            IndexWriter iw1 = new IndexWriter(Rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            doc = new Document();
            doc.Add(NewTextField("field1", "the quick brown fox jumps", Field.Store.YES));
            doc.Add(NewTextField("field2", "the quick brown fox jumps", Field.Store.YES));
            iw1.AddDocument(doc);

            iw1.Dispose();
            Rd2 = NewDirectory();
            IndexWriter iw2 = new IndexWriter(Rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            doc = new Document();
            doc.Add(NewTextField("field1", "the fox jumps over the lazy dog", Field.Store.YES));
            doc.Add(NewTextField("field3", "the fox jumps over the lazy dog", Field.Store.YES));
            iw2.AddDocument(doc);

            iw2.Dispose();

            this.Ir1 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd1));
            this.Ir2 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd2));
        }
Ejemplo n.º 7
0
        public virtual void TestByte()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(new NumericDocValuesField("value", 23));
            doc.Add(NewStringField("value", "23", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new NumericDocValuesField("value", -1));
            doc.Add(NewStringField("value", "-1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new NumericDocValuesField("value", 4));
            doc.Add(NewStringField("value", "4", Field.Store.YES));
            writer.AddDocument(doc);
            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortField("value", SortField.Type_e.BYTE));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            Assert.AreEqual(3, td.TotalHits);
            // numeric order
            Assert.AreEqual("-1", searcher.Doc(td.ScoreDocs[0].Doc).Get("value"));
            Assert.AreEqual("4", searcher.Doc(td.ScoreDocs[1].Doc).Get("value"));
            Assert.AreEqual("23", searcher.Doc(td.ScoreDocs[2].Doc).Get("value"));
            AssertNoFieldCaches();

            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 8
0
        public virtual void TestDoubleOffsetCounting()
        {
            Directory dir = NewDirectory();
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document doc = new Document();
            FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
            customType.StoreTermVectors = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorOffsets = true;
            Field f = NewField("field", "abcd", customType);
            doc.Add(f);
            doc.Add(f);
            Field f2 = NewField("field", "", customType);
            doc.Add(f2);
            doc.Add(f);
            w.AddDocument(doc);
            w.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            Terms vector = r.GetTermVectors(0).Terms("field");
            Assert.IsNotNull(vector);
            TermsEnum termsEnum = vector.Iterator(null);
            Assert.IsNotNull(termsEnum.Next());
            Assert.AreEqual("", termsEnum.Term().Utf8ToString());

            // Token "" occurred once
            Assert.AreEqual(1, termsEnum.TotalTermFreq());

            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            // Token "abcd" occurred three times
            Assert.AreEqual(new BytesRef("abcd"), termsEnum.Next());
            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.AreEqual(3, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(4, dpEnum.StartOffset());
            Assert.AreEqual(8, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());

            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            Assert.IsNull(termsEnum.Next());
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 9
0
        /*
          Run one indexer and 2 searchers against single index as
          stress test.
        */
        public virtual void RunTest(Directory directory)
        {
            TimedThread[] threads = new TimedThread[4];

            IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7);
            ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3;
            IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random());

            // Establish a base index of 100 docs:
            for (int i = 0; i < 100; i++)
            {
                Documents.Document d = new Documents.Document();
                d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES));
                d.Add(NewTextField("contents", English.IntToEnglish(i), Field.Store.NO));
                if ((i - 1) % 7 == 0)
                {
                    writer.Commit();
                }
                writer.AddDocument(d);
            }
            writer.Commit();

            IndexReader r = DirectoryReader.Open(directory);
            Assert.AreEqual(100, r.NumDocs);
            r.Dispose();

            IndexerThread indexerThread = new IndexerThread(writer, threads);
            threads[0] = indexerThread;
            indexerThread.Start();

            IndexerThread indexerThread2 = new IndexerThread(writer, threads);
            threads[1] = indexerThread2;
            indexerThread2.Start();

            SearcherThread searcherThread1 = new SearcherThread(directory, threads);
            threads[2] = searcherThread1;
            searcherThread1.Start();

            SearcherThread searcherThread2 = new SearcherThread(directory, threads);
            threads[3] = searcherThread2;
            searcherThread2.Start();

            indexerThread.Join();
            indexerThread2.Join();
            searcherThread1.Join();
            searcherThread2.Join();

            writer.Dispose();

            Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer");
            Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2");
            Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1");
            Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2");
            //System.out.println("    Writer: " + indexerThread.count + " iterations");
            //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created");
            //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created");
        }
        public virtual void TestForceMergeDeletes()
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
            Document document = new Document();

            FieldType customType = new FieldType();
            customType.Stored = true;

            FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);
            customType1.Tokenized = false;
            customType1.StoreTermVectors = true;
            customType1.StoreTermVectorPositions = true;
            customType1.StoreTermVectorOffsets = true;

            Field idField = NewStringField("id", "", Field.Store.NO);
            document.Add(idField);
            Field storedField = NewField("stored", "stored", customType);
            document.Add(storedField);
            Field termVectorField = NewField("termVector", "termVector", customType1);
            document.Add(termVectorField);
            for (int i = 0; i < 10; i++)
            {
                idField.StringValue = "" + i;
                writer.AddDocument(document);
            }
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);
            Assert.AreEqual(10, ir.MaxDoc);
            Assert.AreEqual(10, ir.NumDocs);
            ir.Dispose();

            IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            writer = new IndexWriter(dir, dontMergeConfig);
            writer.DeleteDocuments(new Term("id", "0"));
            writer.DeleteDocuments(new Term("id", "7"));
            writer.Dispose();

            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(8, ir.NumDocs);
            ir.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            Assert.AreEqual(8, writer.NumDocs());
            Assert.AreEqual(10, writer.MaxDoc);
            writer.ForceMergeDeletes();
            Assert.AreEqual(8, writer.NumDocs());
            writer.Dispose();
            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(8, ir.MaxDoc);
            Assert.AreEqual(8, ir.NumDocs);
            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 11
0
        public virtual void TestAfter()
        {
            // create an index
            Directory indexStore = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore);

            long now = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;

            Document doc = new Document();
            // add time that is in the future
            doc.Add(NewStringField("datefield", DateTools.TimeToString(now + 888888, DateTools.Resolution.MILLISECOND), Field.Store.YES));
            doc.Add(NewTextField("body", "Today is a very sunny day in New York City", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader reader = writer.Reader;
            writer.Dispose();
            IndexSearcher searcher = NewSearcher(reader);

            // filter that should preserve matches
            // DateFilter df1 = DateFilter.After("datefield", now);
            TermRangeFilter df1 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), true, false);
            // filter that should discard matches
            // DateFilter df2 = DateFilter.After("datefield", now + 999999);
            TermRangeFilter df2 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now + 999999, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now + 999999999, DateTools.Resolution.MILLISECOND), false, true);

            // search something that doesn't exist with DateFilter
            Query query1 = new TermQuery(new Term("body", "NoMatchForthis"));

            // search for something that does exists
            Query query2 = new TermQuery(new Term("body", "sunny"));

            ScoreDoc[] result;

            // ensure that queries return expected results without DateFilter first
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(1, result.Length);

            // run queries with DateFilter
            result = searcher.Search(query1, df1, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            result = searcher.Search(query1, df2, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            result = searcher.Search(query2, df1, 1000).ScoreDocs;
            Assert.AreEqual(1, result.Length);

            result = searcher.Search(query2, df2, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            reader.Dispose();
            indexStore.Dispose();
        }
        public virtual void Test()
        {
            Directory dir = NewDirectory();
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            IList<long?> numbers = new List<long?>();
            IList<BytesRef> binary = new List<BytesRef>();
            IList<BytesRef> sorted = new List<BytesRef>();
            int numDocs = AtLeast(100);
            for (int i = 0; i < numDocs; i++)
            {
                Document d = new Document();
                long number = Random().NextLong();
                d.Add(new NumericDocValuesField("number", number));
                BytesRef bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()));
                d.Add(new BinaryDocValuesField("bytes", bytes));
                binary.Add(bytes);
                bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()));
                d.Add(new SortedDocValuesField("sorted", bytes));
                sorted.Add(bytes);
                w.AddDocument(d);
                numbers.Add(number);
            }

            w.ForceMerge(1);
            IndexReader r = w.Reader;
            w.Dispose();

            Assert.AreEqual(1, r.Leaves.Count);
            AtomicReader ar = (AtomicReader)r.Leaves[0].Reader;

            int numThreads = TestUtil.NextInt(Random(), 2, 5);
            IList<ThreadClass> threads = new List<ThreadClass>();
            CountDownLatch startingGun = new CountDownLatch(1);
            for (int t = 0; t < numThreads; t++)
            {
                Random threadRandom = new Random(Random().Next());
                ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, numbers, binary, sorted, numDocs, ar, startingGun, threadRandom);
                thread.Start();
                threads.Add(thread);
            }

            startingGun.countDown();

            foreach (ThreadClass thread in threads)
            {
                thread.Join();
            }

            r.Dispose();
            dir.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();
            DirA = NewDirectory();
            DirB = NewDirectory();

            IndexWriter wA = new IndexWriter(DirA, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            IndexWriter wB = new IndexWriter(DirB, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            long theLong = long.MaxValue;
            double theDouble = double.MaxValue;
            sbyte theByte = sbyte.MaxValue;
            short theShort = short.MaxValue;
            int theInt = int.MaxValue;
            float theFloat = float.MaxValue;
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO));
                doc.Add(NewStringField("theDouble", theDouble.ToString("R"), Field.Store.NO));
                theDouble--;
                doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO));
                doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO));
                doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO));
                doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO));
                if (0 == i % 3)
                {
                    wA.AddDocument(doc);
                }
                else
                {
                    wB.AddDocument(doc);
                }
            }
            wA.Dispose();
            wB.Dispose();
            DirectoryReader rA = DirectoryReader.Open(DirA);
            ReaderA = SlowCompositeReaderWrapper.Wrap(rA);
            ReaderAclone = SlowCompositeReaderWrapper.Wrap(rA);
            ReaderA = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirA));
            ReaderB = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirB));
            ReaderX = SlowCompositeReaderWrapper.Wrap(new MultiReader(ReaderA, ReaderB));

            // LUCENENET specific.Ensure we have an infostream attached to the default FieldCache
            // when running the tests. In Java, this was done in the Core.Search.TestFieldCache.TestInfoStream()
            // method (which polluted the state of these tests), but we need to make the tests self-contained
            // so they can be run correctly regardless of order. Not setting the InfoStream skips an execution
            // path within these tests, so we should do it to make sure we test all of the code.
            FieldCache.DEFAULT.InfoStream = new StringWriter();
        }
Ejemplo n.º 14
0
        public virtual void TestMixedMerge()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy(2)));
            Document d = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);
            d.Add(f1);

            // this field will NOT have norms
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);
            d.Add(f2);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverese
            d.Add(NewField("f1", "this field has norms", customType));

            d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO));

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
Ejemplo n.º 15
0
 public override void SetUp()
 {
     base.SetUp();
     Dir = NewDirectory();
     RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir);
     for (int i = 0; i < 100; i++)
     {
         Document doc = new Document();
         doc.Add(NewStringField("field", Convert.ToString(i), Field.Store.NO));
         doc.Add(NewStringField("field2", Convert.ToString(i % 2 == 0), Field.Store.NO));
         iw.AddDocument(doc);
     }
     Reader = iw.Reader;
     iw.Dispose();
 }
Ejemplo n.º 16
0
        public virtual void TestBinaryFieldInIndex()
        {
            FieldType ft = new FieldType();
            ft.Stored = true;
            IndexableField binaryFldStored = new StoredField("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(BinaryValStored));
            IndexableField stringFldStored = new Field("stringStored", BinaryValStored, ft);

            Documents.Document doc = new Documents.Document();

            doc.Add(binaryFldStored);

            doc.Add(stringFldStored);

            /// <summary>
            /// test for field count </summary>
            Assert.AreEqual(2, doc.Fields.Count);

            /// <summary>
            /// add the doc to a ram index </summary>
            Directory dir = NewDirectory();
            Random r = Random();
            RandomIndexWriter writer = new RandomIndexWriter(r, dir);
            writer.AddDocument(doc);

            /// <summary>
            /// open a reader and fetch the document </summary>
            IndexReader reader = writer.Reader;
            Documents.Document docFromReader = reader.Document(0);
            Assert.IsTrue(docFromReader != null);

            /// <summary>
            /// fetch the binary stored field and compare it's content with the original one </summary>
            BytesRef bytes = docFromReader.GetBinaryValue("binaryStored");
            Assert.IsNotNull(bytes);

            string binaryFldStoredTest = Encoding.UTF8.GetString((byte[])(Array)bytes.Bytes).Substring(bytes.Offset, bytes.Length);
            //new string(bytes.Bytes, bytes.Offset, bytes.Length, IOUtils.CHARSET_UTF_8);
            Assert.IsTrue(binaryFldStoredTest.Equals(BinaryValStored));

            /// <summary>
            /// fetch the string field and compare it's content with the original one </summary>
            string stringFldStoredTest = docFromReader.Get("stringStored");
            Assert.IsTrue(stringFldStoredTest.Equals(BinaryValStored));

            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestInfoStreamGetsFieldName()
        {
            Directory dir = NewDirectory();
            IndexWriter writer;
            IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer());
            ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
            StreamWriter infoPrintStream = new StreamWriter(infoBytes, Encoding.UTF8);
            PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream);
            c.InfoStream = printStreamInfoStream;
            writer = new IndexWriter(dir, c);
            Document doc = new Document();
            doc.Add(NewField("distinctiveFieldName", "aaa ", StoredTextType));
            try
            {
                writer.AddDocument(doc);
                Assert.Fail("Failed to fail.");
            }
            catch (BadNews badNews)
            {
                infoPrintStream.Flush();
                string infoStream = Encoding.UTF8.GetString(infoBytes.GetBuffer());
                Assert.IsTrue(infoStream.Contains("distinctiveFieldName"));
            }

            writer.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 18
0
        public virtual void TestStartPositions()
        {
            Directory dir = NewDirectory();

            // mimic StopAnalyzer
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton());
            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer);
            Document doc = new Document();
            doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO));
            writer.AddDocument(doc);
            Document doc2 = new Document();
            doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO));
            writer.AddDocument(doc2);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // user queries on "starts-with quick"
            SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1);
            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            // user queries on "starts-with the quick"
            SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2);
            sfq = new SpanNotQuery(include, sfq);
            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 19
0
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            Field f = NewField("foo", "this is a test test", ft);
            doc.Add(f);
            for (int i = 0; i < 100; i++)
            {
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;
            w.Dispose();

            Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test")));

            DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS);
            while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(2, de.Freq());
            }

            reader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 20
0
        public virtual void TestBackToTheFuture()
        {
            Directory dir = NewDirectory();
            IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));

            Document doc = new Document();
            doc.Add(NewStringField("foo", "bar", Field.Store.NO));
            iw.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("foo", "baz", Field.Store.NO));
            iw.AddDocument(doc);

            DirectoryReader r1 = DirectoryReader.Open(iw, true);

            iw.DeleteDocuments(new Term("foo", "baz"));
            DirectoryReader r2 = DirectoryReader.Open(iw, true);

            FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r2), "foo");

            SortedSetDocValues v = FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r1), "foo");
            Assert.AreEqual(2, v.ValueCount);
            v.Document = 1;
            Assert.AreEqual(1, v.NextOrd());

            iw.Dispose();
            r1.Dispose();
            r2.Dispose();
            dir.Dispose();
        }
        public virtual void TestDateCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            const long @base = 13; // prime
            long day = 1000L * 60 * 60 * 24;

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            for (int i = 0; i < 50; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new longs costed less than if they had only been packed
            Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8);
        }
Ejemplo n.º 22
0
        public virtual void TestPrefixQuery_Mem()
        {
            Directory directory = NewDirectory();

            string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" };
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory);
            for (int i = 0; i < categories.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("category", categories[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader reader = writer.Reader;

            PrefixQuery query = new PrefixQuery(new Term("category", "/Computers"));
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below");

            query = new PrefixQuery(new Term("category", "/Computers/Mac"));
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "One in /Computers/Mac");

            query = new PrefixQuery(new Term("category", ""));
            Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category");
            Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "everything");
            writer.Dispose();
            reader.Dispose();
            directory.Dispose();
        }
        public static void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory);

            Document doc = new Document();
            Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO);
            doc.Add(field);

            for (int i = 0; i < 5137; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            field.StringValue = "meaninglessnames";
            for (int i = 5138; i < 11377; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Ejemplo n.º 24
0
        public virtual void TestConstantScoreQueryAndFilter()
        {
            Directory d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), d);
            Document doc = new Document();
            doc.Add(NewStringField("field", "a", Field.Store.NO));
            w.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("field", "b", Field.Store.NO));
            w.AddDocument(doc);
            IndexReader r = w.Reader;
            w.Dispose();

            Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b"))));
            Query query = new ConstantScoreQuery(filterB);

            IndexSearcher s = NewSearcher(r);
            Assert.AreEqual(1, s.Search(query, filterB, 1).TotalHits); // Query for field:b, Filter field:b

            Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))));
            query = new ConstantScoreQuery(filterA);

            Assert.AreEqual(0, s.Search(query, filterB, 1).TotalHits); // Query field:b, Filter field:a

            r.Dispose();
            d.Dispose();
        }
Ejemplo n.º 25
0
        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50);
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.YES);
            doc.Add(field);
            Terms = new SortedSet<BytesRef>();

            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                Terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            TermsAutomaton = BasicAutomata.MakeStringUnion(Terms);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
        public virtual void TestAddIndexes()
        {
            Directory dir1 = NewDirectory();
            Directory dir2 = NewDirectory();
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d1 = new Document();
            d1.Add(new TextField("f1", "first field", Field.Store.YES));
            d1.Add(new TextField("f2", "second field", Field.Store.YES));
            writer.AddDocument(d1);

            writer.Dispose();
            writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d2 = new Document();
            FieldType customType2 = new FieldType(TextField.TYPE_STORED);
            customType2.StoreTermVectors = true;
            d2.Add(new TextField("f2", "second field", Field.Store.YES));
            d2.Add(new Field("f1", "first field", customType2));
            d2.Add(new TextField("f3", "third field", Field.Store.YES));
            d2.Add(new TextField("f4", "fourth field", Field.Store.YES));
            writer.AddDocument(d2);

            writer.Dispose();

            writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            writer.AddIndexes(dir2);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir1);
            Assert.AreEqual(2, sis.Size());

            FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
            FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

            Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
            Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
            // make sure the ordering of the "external" segment is preserved
            Assert.AreEqual("f2", fis2.FieldInfo(0).Name);
            Assert.AreEqual("f1", fis2.FieldInfo(1).Name);
            Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
            Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

            dir1.Dispose();
            dir2.Dispose();
        }
Ejemplo n.º 27
0
        public virtual void TestBinaryField()
        {
            Documents.Document doc = new Documents.Document();

            FieldType ft = new FieldType();
            ft.Stored = true;
            IndexableField stringFld = new Field("string", BinaryVal, ft);
            IndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8));
            IndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8));

            doc.Add(stringFld);
            doc.Add(binaryFld);

            Assert.AreEqual(2, doc.Fields.Count);

            Assert.IsTrue(binaryFld.BinaryValue != null);
            Assert.IsTrue(binaryFld.FieldType.Stored);
            Assert.IsFalse(binaryFld.FieldType.Indexed);

            string binaryTest = doc.GetBinaryValue("binary").Utf8ToString();
            Assert.IsTrue(binaryTest.Equals(BinaryVal));

            string stringTest = doc.Get("string");
            Assert.IsTrue(binaryTest.Equals(stringTest));

            doc.Add(binaryFld2);

            Assert.AreEqual(3, doc.Fields.Count);

            BytesRef[] binaryTests = doc.GetBinaryValues("binary");

            Assert.AreEqual(2, binaryTests.Length);

            binaryTest = binaryTests[0].Utf8ToString();
            string binaryTest2 = binaryTests[1].Utf8ToString();

            Assert.IsFalse(binaryTest.Equals(binaryTest2));

            Assert.IsTrue(binaryTest.Equals(BinaryVal));
            Assert.IsTrue(binaryTest2.Equals(BinaryVal2));

            doc.RemoveField("string");
            Assert.AreEqual(2, doc.Fields.Count);

            doc.RemoveFields("binary");
            Assert.AreEqual(0, doc.Fields.Count);
        }
Ejemplo n.º 28
0
        public virtual void TestFixedBinary()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
               .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

            Document doc = new Document();
            var bytes = new byte[4];
            BytesRef data = new BytesRef(bytes);
            BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 24);
                bytes[1] = (byte)(i >> 16);
                bytes[2] = (byte)(i >> 8);
                bytes[3] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r = DirectoryReader.Open(dir);
            int expectedValue = 0;
            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader reader = context.AtomicReader;
                BytesRef scratch = new BytesRef();
                BinaryDocValues dv = reader.GetBinaryDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 24);
                    bytes[1] = (byte)(expectedValue >> 16);
                    bytes[2] = (byte)(expectedValue >> 8);
                    bytes[3] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 29
0
 private Document Adoc(string[] vals)
 {
     Document doc = new Document();
     for (int i = 0; i < vals.Length - 2; i += 2)
     {
         doc.Add(NewTextField(vals[i], vals[i + 1], Field.Store.YES));
     }
     return doc;
 }
Ejemplo n.º 30
0
        public virtual void TestSumDocFreq_Mem()
        {
            int numDocs = AtLeast(500);

            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);

            Document doc = new Document();
            Field id = NewStringField("id", "", Field.Store.NO);
            Field field1 = NewTextField("foo", "", Field.Store.NO);
            Field field2 = NewTextField("bar", "", Field.Store.NO);
            doc.Add(id);
            doc.Add(field1);
            doc.Add(field2);
            for (int i = 0; i < numDocs; i++)
            {
                id.StringValue = "" + i;
                char ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                char ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                field1.StringValue = "" + ch1 + " " + ch2;
                ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                field2.StringValue = "" + ch1 + " " + ch2;
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.Reader;

            AssertSumDocFreq(ir);
            ir.Dispose();

            int numDeletions = AtLeast(20);
            for (int i = 0; i < numDeletions; i++)
            {
                writer.DeleteDocuments(new Term("id", "" + Random().Next(numDocs)));
            }
            writer.ForceMerge(1);
            writer.Dispose();

            ir = DirectoryReader.Open(dir);
            AssertSumDocFreq(ir);
            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 31
0
        private void CreateIndex(int numHits)
        {
            int numDocs = 500;

            Analyzer  analyzer  = new AnalyzerAnonymousInnerClassHelper(this);
            Directory directory = new SeekCountingDirectory(this, new RAMDirectory());
            // note: test explicitly disables payloads
            IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(false)));

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string   content;
                if (i % (numDocs / numHits) == 0)
                {
                    // add a document that matches the query "term1 term2"
                    content = this.Term1 + " " + this.Term2;
                }
                else if (i % 15 == 0)
                {
                    // add a document that only contains term1
                    content = this.Term1 + " " + this.Term1;
                }
                else
                {
                    // add a document that contains term2 but not term 1
                    content = this.Term3 + " " + this.Term2;
                }

                doc.Add(NewTextField(this.Field, content, Documents.Field.Store.YES));
                writer.AddDocument(doc);
            }

            // make sure the index has only a single segment
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(directory));

            this.Searcher = NewSearcher(reader);
        }
Ejemplo n.º 32
0
        public static void MakeIndex()
        {
            Dir = new RAMDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            // make sure we have more than one segment occationally
            int num = AtLeast(31);

            for (int i = 0; i < num; i++)
            {
                Documents.Document doc = new Documents.Document();
                doc.Add(NewTextField("f", "a b c d b c d c d d", Field.Store.NO));
                w.AddDocument(doc);

                doc = new Documents.Document();
                doc.Add(NewTextField("f", "a b c d", Field.Store.NO));
                w.AddDocument(doc);
            }

            s = NewSearcher(w.Reader);
            w.Dispose();
        }
Ejemplo n.º 33
0
        public virtual void TestTermDocsEnum()
        {
            Directory   dir = NewDirectory();
            IndexWriter w   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            Documents.Document d = new Documents.Document();
            d.Add(NewStringField("f", "j", Field.Store.NO));
            w.AddDocument(d);
            w.Commit();
            w.AddDocument(d);
            IndexReader r = w.GetReader();

            w.Dispose();
            DocsEnum de = MultiFields.GetTermDocsEnum(r, null, "f", new BytesRef("j"));

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc());
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 34
0
        public virtual void TestSeparateEnums()
        {
            Directory   dir = NewDirectory();
            IndexWriter w   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            Documents.Document d = new Documents.Document();
            d.Add(NewStringField("f", "j", Field.Store.NO));
            w.AddDocument(d);
            w.Commit();
            w.AddDocument(d);
            IndexReader r = w.GetReader();

            w.Dispose();
            DocsEnum d1 = TestUtil.Docs(Random, r, "f", new BytesRef("j"), null, null, DocsFlags.NONE);
            DocsEnum d2 = TestUtil.Docs(Random, r, "f", new BytesRef("j"), null, null, DocsFlags.NONE);

            Assert.AreEqual(0, d1.NextDoc());
            Assert.AreEqual(0, d2.NextDoc());
            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestMaxMergeCount()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            int maxMergeCount = TestUtil.NextInt32(Random, 1, 5);
            int maxMergeThreads = TestUtil.NextInt32(Random, 1, maxMergeCount);
            CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount);
            AtomicInt32 runningMergeCount = new AtomicInt32(0);
            AtomicBoolean failed = new AtomicBoolean();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads);
            }

            ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed);
            cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads);
            iwc.SetMergeScheduler(cms);
            iwc.SetMaxBufferedDocs(2);

            TieredMergePolicy tmp = new TieredMergePolicy();
            iwc.SetMergePolicy(tmp);
            tmp.MaxMergeAtOnce = 2;
            tmp.SegmentsPerTier = 2;

            IndexWriter w = new IndexWriter(dir, iwc);
            Document doc = new Document();
            doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED));
            while (enoughMergesWaiting.CurrentCount != 0 && !failed)
            {
                for (int i = 0; i < 10; i++)
                {
                    w.AddDocument(doc);
                }
            }
            w.Dispose(false);
            dir.Dispose();
        }
Ejemplo n.º 36
0
        public override void BeforeClass() // LUCENENET specific - renamed from MakeIndex() to ensure calling order
        {
            base.BeforeClass();

            Dir = new RAMDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            // make sure we have more than one segment occationally
            int num = AtLeast(31);

            for (int i = 0; i < num; i++)
            {
                Documents.Document doc = new Documents.Document();
                doc.Add(NewTextField("f", "a b c d b c d c d d", Field.Store.NO));
                w.AddDocument(doc);

                doc = new Documents.Document();
                doc.Add(NewTextField("f", "a b c d", Field.Store.NO));
                w.AddDocument(doc);
            }

            s = NewSearcher(w.Reader);
            w.Dispose();
        }
Ejemplo n.º 37
0
        public virtual void TestNoOrds()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.StoreTermVectors = true;
            doc.Add(new Field("foo", "this is a test", ft));
            iw.AddDocument(doc);
            AtomicReader ir = GetOnlySegmentReader(iw.GetReader());
            Terms terms = ir.GetTermVector(0, "foo");
            Assert.IsNotNull(terms);
            TermsEnum termsEnum = terms.GetEnumerator();
            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this")));
            try
            {
                var _ = termsEnum.Ord;
                Assert.Fail();
            }
            catch (Exception expected) when (expected.IsUnsupportedOperationException())
            {
                // expected exception
            }

            try
            {
                termsEnum.SeekExact(0);
                Assert.Fail();
            }
            catch (Exception expected) when (expected.IsUnsupportedOperationException())
            {
                // expected exception
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 38
0
        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");

            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);

            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.Terms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Size());
                TermsEnum tvsEnum = tvs.Iterator(null);
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv"))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
                Assert.IsNull(tvsEnum.Next());
            }

            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 39
0
        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Documents.Document doc = new Documents.Document();
#pragma warning disable 612, 618
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
#pragma warning restore 612, 618
            w.AddDocument(doc);
            IndexReader r = w.GetReader();
            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");
            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.GetTerms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Count);
                TermsEnum tvsEnum = tvs.GetEnumerator();
                Assert.IsTrue(tvsEnum.MoveNext());
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Term);
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv", StringComparison.Ordinal))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.IsTrue(tvsEnum.MoveNext());
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Term);
                Assert.IsFalse(tvsEnum.MoveNext());
            }

            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 40
0
        public virtual void TestRandomPhrases()
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = new MockAnalyzer(Random);

            RandomIndexWriter       w    = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy()));
            IList <IList <string> > docs = new List <IList <string> >();

            Documents.Document d = new Documents.Document();
            Field f = NewTextField("f", "", Field.Store.NO);

            d.Add(f);

            Random r = Random;

            int NUM_DOCS = AtLeast(10);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                // must be > 4096 so it spans multiple chunks
                int termCount = TestUtil.NextInt32(Random, 4097, 8200);

                IList <string> doc = new List <string>();

                StringBuilder sb = new StringBuilder();
                while (doc.Count < termCount)
                {
                    if (r.Next(5) == 1 || docs.Count == 0)
                    {
                        // make new non-empty-string term
                        string term;
                        while (true)
                        {
                            term = TestUtil.RandomUnicodeString(r);
                            if (term.Length > 0)
                            {
                                break;
                            }
                        }
                        Exception   priorException = null; // LUCENENET: No need to cast to IOExcpetion
                        TokenStream ts             = analyzer.GetTokenStream("ignore", new StringReader(term));
                        try
                        {
                            ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>();
                            ts.Reset();
                            while (ts.IncrementToken())
                            {
                                string text = termAttr.ToString();
                                doc.Add(text);
                                sb.Append(text).Append(' ');
                            }
                            ts.End();
                        }
                        catch (Exception e) when(e.IsIOException())
                        {
                            priorException = e;
                        }
                        finally
                        {
                            IOUtils.DisposeWhileHandlingException(priorException, ts);
                        }
                    }
                    else
                    {
                        // pick existing sub-phrase
                        IList <string> lastDoc = docs[r.Next(docs.Count)];
                        int            len     = TestUtil.NextInt32(r, 1, 10);
                        int            start   = r.Next(lastDoc.Count - len);
                        for (int k = start; k < start + len; k++)
                        {
                            string t = lastDoc[k];
                            doc.Add(t);
                            sb.Append(t).Append(' ');
                        }
                    }
                }
                docs.Add(doc);
                f.SetStringValue(sb.ToString());
                w.AddDocument(d);
            }

            IndexReader   reader = w.GetReader();
            IndexSearcher s      = NewSearcher(reader);

            w.Dispose();

            // now search
            int num = AtLeast(10);

            for (int i = 0; i < num; i++)
            {
                int            docID = r.Next(docs.Count);
                IList <string> doc   = docs[docID];

                int           numTerm = TestUtil.NextInt32(r, 2, 20);
                int           start   = r.Next(doc.Count - numTerm);
                PhraseQuery   pq      = new PhraseQuery();
                StringBuilder sb      = new StringBuilder();
                for (int t = start; t < start + numTerm; t++)
                {
                    pq.Add(new Term("f", doc[t]));
                    sb.Append(doc[t]).Append(' ');
                }

                TopDocs hits  = s.Search(pq, NUM_DOCS);
                bool    found = false;
                for (int j = 0; j < hits.ScoreDocs.Length; j++)
                {
                    if (hits.ScoreDocs[j].Doc == docID)
                    {
                        found = true;
                        break;
                    }
                }

                Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start);
            }

            reader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 41
0
        public virtual void TestPhraseQueryInConjunctionScorer()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory);

            Documents.Document doc = new Documents.Document();
            doc.Add(NewTextField("source", "marketing info", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "foobar", Field.Store.YES));
            doc.Add(NewTextField("source", "marketing info", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term("source", "marketing"));
            phraseQuery.Add(new Term("source", "info"));
            ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, phraseQuery, searcher);

            TermQuery    termQuery    = new TermQuery(new Term("contents", "foobar"));
            BooleanQuery booleanQuery = new BooleanQuery();
            booleanQuery.Add(termQuery, Occur.MUST);
            booleanQuery.Add(phraseQuery, Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, termQuery, searcher);

            reader.Dispose();

            writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE));
            doc    = new Documents.Document();
            doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES));
            writer.AddDocument(doc);

            reader = writer.GetReader();
            writer.Dispose();

            searcher = NewSearcher(reader);

            termQuery   = new TermQuery(new Term("contents", "woo"));
            phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("contents", "map"));
            phraseQuery.Add(new Term("contents", "entry"));

            hits = searcher.Search(termQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(termQuery, Occur.MUST);
            booleanQuery.Add(phraseQuery, Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(phraseQuery, Occur.MUST);
            booleanQuery.Add(termQuery, Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, booleanQuery, searcher);

            reader.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 42
0
        public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            if ("Lucene3x".Equals(Codec.Default.Name))
            {
                throw new Exception("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            dir.CheckIndexOnClose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(scheduler)
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random(), TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = Environment.TickCount;
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec");
                }
                savedTerms = ts.SavedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms == null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms));

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Ejemplo n.º 43
0
        public virtual void Test2BTerms_Mem()
        {
            if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal))
            {
                throw RuntimeException.Create("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }
            dir.CheckIndexOnDispose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(new ConcurrentMergeScheduler())
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(OpenMode.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random, TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond;                                                          // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                }
                savedTerms = ts.savedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms is null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Ejemplo n.º 44
0
        public virtual void TestStressAdvance_Mem()
        {
            for (int iter = 0; iter < 3; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory          dir   = NewDirectory();
                RandomIndexWriter  w     = new RandomIndexWriter(Random(), dir);
                HashSet <int>      aDocs = new HashSet <int>();
                Documents.Document doc   = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field idField = NewStringField("id", "", Field.Store.YES);
                doc.Add(idField);
                int num = AtLeast(4097);
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: numDocs=" + num);
                }
                for (int id = 0; id < num; id++)
                {
                    if (Random().Next(4) == 3)
                    {
                        f.StringValue = "a";
                        aDocs.Add(id);
                    }
                    else
                    {
                        f.StringValue = "b";
                    }
                    idField.StringValue = "" + id;
                    w.AddDocument(doc);
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: doc upto " + id);
                    }
                }

                w.ForceMerge(1);

                IList <int> aDocIDs = new List <int>();
                IList <int> bDocIDs = new List <int>();

                DirectoryReader r         = w.Reader;
                int[]           idToDocID = new int[r.MaxDoc];
                for (int docID = 0; docID < idToDocID.Length; docID++)
                {
                    int id = Convert.ToInt32(r.Document(docID).Get("id"));
                    if (aDocs.Contains(id))
                    {
                        aDocIDs.Add(docID);
                    }
                    else
                    {
                        bDocIDs.Add(docID);
                    }
                }
                TermsEnum te = GetOnlySegmentReader(r).Fields.Terms("field").Iterator(null);

                DocsEnum de = null;
                for (int iter2 = 0; iter2 < 10; iter2++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2);
                    }
                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, aDocIDs);

                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, bDocIDs);
                }

                w.Dispose();
                r.Dispose();
                dir.Dispose();
            }
        }
Ejemplo n.º 45
0
        /*
         * Run one indexer and 2 searchers against single index as
         * stress test.
         */

        public virtual void RunTest(Directory directory)
        {
            TimedThread[] threads = new TimedThread[4];

            IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7);

            ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3;
            IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random());

            // Establish a base index of 100 docs:
            for (int i = 0; i < 100; i++)
            {
                Documents.Document d = new Documents.Document();
                d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES));
                d.Add(NewTextField("contents", English.IntToEnglish(i), Field.Store.NO));
                if ((i - 1) % 7 == 0)
                {
                    writer.Commit();
                }
                writer.AddDocument(d);
            }
            writer.Commit();

            IndexReader r = DirectoryReader.Open(directory);

            Assert.AreEqual(100, r.NumDocs);
            r.Dispose();

            IndexerThread indexerThread = new IndexerThread(writer, threads);

            threads[0] = indexerThread;
            indexerThread.Start();

            IndexerThread indexerThread2 = new IndexerThread(writer, threads);

            threads[1] = indexerThread2;
            indexerThread2.Start();

            SearcherThread searcherThread1 = new SearcherThread(directory, threads);

            threads[2] = searcherThread1;
            searcherThread1.Start();

            SearcherThread searcherThread2 = new SearcherThread(directory, threads);

            threads[3] = searcherThread2;
            searcherThread2.Start();

            indexerThread.Join();
            indexerThread2.Join();
            searcherThread1.Join();
            searcherThread2.Join();

            writer.Dispose();

            Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer");
            Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2");
            Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1");
            Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2");
            //System.out.println("    Writer: " + indexerThread.count + " iterations");
            //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created");
            //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created");
        }
Ejemplo n.º 46
0
        public virtual void TestSurrogatesOrder()
        {
            Directory dir    = NewDirectory();
            var       config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            config.Codec = new PreFlexRWCodec();
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, config);

            int numField = TestUtil.NextInt32(Random, 2, 5);

            int uniqueTermCount = 0;

            int tc = 0;

            var fieldTerms = new JCG.List <Term>();

            for (int f = 0; f < numField; f++)
            {
                string field    = "f" + f;
                int    numTerms = AtLeast(200);

                ISet <string> uniqueTerms = new JCG.HashSet <string>();

                for (int i = 0; i < numTerms; i++)
                {
                    string term = GetRandomString(Random) + "_ " + (tc++);
                    uniqueTerms.Add(term);
                    fieldTerms.Add(new Term(field, term));
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewStringField(field, term, Field.Store.NO));
                    w.AddDocument(doc);
                }
                uniqueTermCount += uniqueTerms.Count;
            }

            IndexReader reader = w.GetReader();

            if (Verbose)
            {
                fieldTerms.Sort(termAsUTF16Comparer);

                Console.WriteLine("\nTEST: UTF16 order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            // sorts in code point order:
            fieldTerms.Sort();

            if (Verbose)
            {
                Console.WriteLine("\nTEST: codepoint order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            Term[] fieldTermsArray = fieldTerms.ToArray();

            //SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);

            //FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
            //Assert.IsNotNull(fields);

            DoTestStraightEnum(fieldTerms, reader, uniqueTermCount);
            DoTestSeekExists(Random, fieldTerms, reader);
            DoTestSeekDoesNotExist(Random, numField, fieldTerms, fieldTermsArray, reader);

            reader.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 47
0
        public virtual void TestRandom()
        {
            int num = AtLeast(2);

            for (int iter = 0; iter < num; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }

                Directory dir = NewDirectory();

                IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                // we can do this because we use NoMergePolicy (and dont merge to "nothing")
                w.KeepFullyDeletedSegments = true;

                IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >();
                ISet <int?>      deleted = new JCG.HashSet <int?>();
                IList <BytesRef> terms   = new List <BytesRef>();

                int numDocs            = TestUtil.NextInt32(Random, 1, 100 * RANDOM_MULTIPLIER);
                Documents.Document doc = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field id = NewStringField("id", "", Field.Store.NO);
                doc.Add(id);

                bool onlyUniqueTerms = Random.NextBoolean();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
                }
                ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>();
                for (int i = 0; i < numDocs; i++)
                {
                    if (!onlyUniqueTerms && Random.NextBoolean() && terms.Count > 0)
                    {
                        // re-use existing term
                        BytesRef term = terms[Random.Next(terms.Count)];
                        docs[term].Add(i);
                        f.SetStringValue(term.Utf8ToString());
                    }
                    else
                    {
                        string   s    = TestUtil.RandomUnicodeString(Random, 10);
                        BytesRef term = new BytesRef(s);
                        if (!docs.TryGetValue(term, out IList <int?> docsTerm))
                        {
                            docs[term] = docsTerm = new List <int?>();
                        }
                        docsTerm.Add(i);
                        terms.Add(term);
                        uniqueTerms.Add(term);
                        f.SetStringValue(s);
                    }
                    id.SetStringValue("" + i);
                    w.AddDocument(doc);
                    if (Random.Next(4) == 1)
                    {
                        w.Commit();
                    }
                    if (i > 0 && Random.Next(20) == 1)
                    {
                        int delID = Random.Next(i);
                        deleted.Add(delID);
                        w.DeleteDocuments(new Term("id", "" + delID));
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: delete " + delID);
                        }
                    }
                }

                if (VERBOSE)
                {
                    List <BytesRef> termsList = new List <BytesRef>(uniqueTerms);
#pragma warning disable 612, 618
                    termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer);
#pragma warning restore 612, 618
                    Console.WriteLine("TEST: terms in UTF16 order:");
                    foreach (BytesRef b in termsList)
                    {
                        Console.WriteLine("  " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b);
                        foreach (int docID in docs[b])
                        {
                            if (deleted.Contains(docID))
                            {
                                Console.WriteLine("    " + docID + " (deleted)");
                            }
                            else
                            {
                                Console.WriteLine("    " + docID);
                            }
                        }
                    }
                }

                IndexReader reader = w.GetReader();
                w.Dispose();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: reader=" + reader);
                }

                IBits liveDocs = MultiFields.GetLiveDocs(reader);
                foreach (int delDoc in deleted)
                {
                    Assert.IsFalse(liveDocs.Get(delDoc));
                }

                for (int i = 0; i < 100; i++)
                {
                    BytesRef term = terms[Random.Next(terms.Count)];
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term);
                    }

                    DocsEnum docsEnum = TestUtil.Docs(Random, reader, "field", term, liveDocs, null, DocsFlags.NONE);
                    Assert.IsNotNull(docsEnum);

                    foreach (int docID in docs[term])
                    {
                        if (!deleted.Contains(docID))
                        {
                            Assert.AreEqual(docID, docsEnum.NextDoc());
                        }
                    }
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                }

                reader.Dispose();
                dir.Dispose();
            }
        }
        public virtual void TestFlushExceptions()
        {
            #if NETCOREAPP2_0
            fail("LUCENENET TODO: Causing fatal crashes intermittently on NETCOREAPP2.0");
            #endif
            MockDirectoryWrapper directory = NewMockDirectory();
            FailOnlyOnFlush failure = new FailOnlyOnFlush(this);
            directory.FailOn(failure);

            IndexWriter writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2));
            Document doc = new Document();
            Field idField = NewStringField("id", "", Field.Store.YES);
            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + i);
                }

                for (int j = 0; j < 20; j++)
                {
                    idField.SetStringValue(Convert.ToString(i * 20 + j));
                    writer.AddDocument(doc);
                }

                // must cycle here because sometimes the merge flushes
                // the doc we just added and so there's nothing to
                // flush, and we don't hit the exception
                while (true)
                {
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, true);
                        if (failure.hitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine(ioe.StackTrace);
                        }
                        failure.ClearDoFail();
                        break;
                    }
                }
                Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs);
            }

            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(directory);
            Assert.AreEqual(200 + extraCount, reader.NumDocs);
            reader.Dispose();
            directory.Dispose();
        }
 public override void BinaryField(FieldInfo fieldInfo, byte[] value)
 {
     doc.Add(new StoredField(fieldInfo.Name, value));
 }