private void CreateRandomIndexes(int maxSegments)
 {
     dir = NewDirectory();
     numDocs = AtLeast(150);
     int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
     ISet<string> randomTerms = new HashSet<string>();
     while (randomTerms.size() < numTerms)
     {
         randomTerms.add(TestUtil.RandomSimpleString(Random()));
     }
     terms = new List<string>(randomTerms);
     int seed = Random().Next();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
     iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = RandomDocument();
         iw.AddDocument(doc);
         if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0))
         {
             iw.Commit();
         }
         if (Random().nextInt(15) == 0)
         {
             string term = RandomInts.RandomFrom(Random(), terms);
             iw.DeleteDocuments(new Term("s", term));
         }
     }
     reader = iw.Reader;
 }
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true), Similarity, TimeZone);

            for (int i = 900; i < 1112; i++)
            {
                Document doc = new Document();
                string num = Regex.Replace(Regex.Replace(English.IntToEnglish(i), "[-]", " "), "[,]", "");
                doc.Add(NewTextField("numbers", num, Field.Store.NO));
                writer.AddDocument(doc);
            }

            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "hundredeight eightyeight yeight", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "tres y cinco", Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Commit();
            writer.Dispose();
        }
        public virtual void TestReuseDocsEnumNoReuse()
        {
            Directory dir = NewDirectory();
            Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);
            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext ctx in open.Leaves())
            {
                AtomicReader indexReader = (AtomicReader)ctx.Reader();
                Terms terms = indexReader.Terms("body");
                TermsEnum iterator = terms.Iterator(null);
                IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc());
                while ((iterator.Next()) != null)
                {
                    DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc()), null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
        public override void SetUp()
        {
            base.SetUp();
            _dir = NewDirectory();
            _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.StoreTermVectors = true;
            ft.StoreTermVectorOffsets = true;
            ft.StoreTermVectorPositions = true;

            Analyzer analyzer = new MockAnalyzer(Random());

            Document doc;
            for (int i = 0; i < 100; i++)
            {
                doc = new Document();
                doc.Add(new Field(_idFieldName, Random().toString(), ft));
                doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append(
                    Random().toString()).toString(), ft));
                doc.Add(new Field(_classFieldName, Random().toString(), ft));
                _indexWriter.AddDocument(doc, analyzer);
            }

            _indexWriter.Commit();

            _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader);
        }
 public virtual void Test()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     conf.SetCodec(new Lucene46Codec());
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf);
     Document doc = new Document();
     // these fields should sometimes get term vectors, etc
     Field idField = NewStringField("id", "", Field.Store.NO);
     Field bodyField = NewTextField("body", "", Field.Store.NO);
     Field dvField = new NumericDocValuesField("dv", 5);
     doc.Add(idField);
     doc.Add(bodyField);
     doc.Add(dvField);
     for (int i = 0; i < 100; i++)
     {
         idField.StringValue = Convert.ToString(i);
         bodyField.StringValue = TestUtil.RandomUnicodeString(Random());
         riw.AddDocument(doc);
         if (Random().Next(7) == 0)
         {
             riw.Commit();
         }
         // TODO: we should make a new format with a clean header...
         // if (Random().nextInt(20) == 0) {
         //  riw.DeleteDocuments(new Term("id", Integer.toString(i)));
         // }
     }
     riw.Dispose();
     CheckHeaders(dir);
     dir.Dispose();
 }
        public virtual void TestIndexing()
        {
            DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete");
            BaseDirectoryWrapper d = NewFSDirectory(tmpDir);

            // We want to "see" files removed if Lucene removed
            // them.  this is still worth running on Windows since
            // some files the IR opens and closes.
            if (d is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)d).NoDeleteOpenFile = false;
            }
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));
            w.w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 5, 30));

            w.Commit();
            ThreadClass[] indexThreads = new ThreadClass[Random().Next(4)];
            long stopTime = Environment.TickCount + AtLeast(1000);
            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x] = new ThreadAnonymousInnerClassHelper(w, stopTime);
                indexThreads[x].Name = "Thread " + x;
                indexThreads[x].Start();
            }

            HashSet<string> allFiles = new HashSet<string>();

            DirectoryReader r = DirectoryReader.Open(d);
            while (Environment.TickCount < stopTime)
            {
                IndexCommit ic = r.IndexCommit;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: check files: " + ic.FileNames);
                }
                allFiles.AddAll(ic.FileNames);
                // Make sure no old files were removed
                foreach (string fileName in allFiles)
                {
                    Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist");
                }
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                if (r2 != null)
                {
                    r.Dispose();
                    r = r2;
                }
                Thread.Sleep(1);
            }
            r.Dispose();

            foreach (ThreadClass t in indexThreads)
            {
                t.Join();
            }
            w.Dispose();
            d.Dispose();

            System.IO.Directory.Delete(tmpDir.FullName, true);
        }
Beispiel #7
0
        public virtual void TestSomeSegmentsMissing()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet);
            Directory dir = NewDirectory();

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new SortedSetDocValuesFacetField("a", "foo1"));
            writer.AddDocument(config.Build(doc));
            writer.Commit();

            doc = new Document();
            writer.AddDocument(config.Build(doc));
            writer.Commit();

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo2"));
            writer.AddDocument(config.Build(doc));
            writer.Commit();

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);
            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            // Ask for top 10 labels for any dims that have counts:
            Assert.AreEqual("dim=a path=[] value=2 childCount=2\n  foo1 (1)\n  foo2 (1)\n", facets.GetTopChildren(10, "a").ToString());

            searcher.IndexReader.Dispose();
            dir.Dispose();
        }
        public virtual void TestDeletePartiallyWrittenFilesIfAbort()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            iwConf.SetCodec(CompressingCodec.RandomInstance(Random()));
            // disable CFS because this test checks file names
            iwConf.SetMergePolicy(NewLogMergePolicy(false));
            iwConf.SetUseCompoundFile(false);
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            Document validDoc = new Document();

            validDoc.Add(new IntField("id", 0, Field.Store.YES));
            iw.AddDocument(validDoc);
            iw.Commit();

            // make sure that #writeField will fail to trigger an abort
            Document  invalidDoc = new Document();
            FieldType fieldType  = new FieldType();

            fieldType.Stored = true;
            invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType));

            try
            {
                iw.AddDocument(invalidDoc);
                iw.Commit();
            }
            finally
            {
                int counter = 0;
                foreach (string fileName in dir.ListAll())
                {
                    if (fileName.EndsWith(".fdt") || fileName.EndsWith(".fdx"))
                    {
                        counter++;
                    }
                }
                // Only one .fdt and one .fdx files must have been found
                Assert.AreEqual(2, counter);
                iw.Dispose();
                dir.Dispose();
            }
        }
Beispiel #9
0
        public virtual void TestBasic()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet);
            Directory dir = NewDirectory();

            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            doc.Add(new SortedSetDocValuesFacetField("a", "bar"));
            doc.Add(new SortedSetDocValuesFacetField("a", "zoo"));
            doc.Add(new SortedSetDocValuesFacetField("b", "baz"));
            writer.AddDocument(config.Build(doc));
            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            Assert.AreEqual("dim=a path=[] value=4 childCount=3\n  foo (2)\n  bar (1)\n  zoo (1)\n", facets.GetTopChildren(10, "a").ToString());
            Assert.AreEqual("dim=b path=[] value=1 childCount=1\n  baz (1)\n", facets.GetTopChildren(10, "b").ToString());

            // DrillDown:
            DrillDownQuery q = new DrillDownQuery(config);

            q.Add("a", "foo");
            q.Add("b", "baz");
            TopDocs hits = searcher.Search(q, 1);

            Assert.AreEqual(1, hits.TotalHits);

            IOUtils.Dispose(writer, searcher.IndexReader, dir);
        }
        public virtual void TestFloatNorms()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity provider = new MySimProvider(this);
            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            LineFileDocs docs = new LineFileDocs(Random());
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                Document doc = docs.NextDoc();
                float nextFloat = (float)Random().NextDouble();
                // Cast to a double to get more precision output to the string.
                Field f = new TextField(FloatTestField, "" + (double)nextFloat, Field.Store.YES);
                f.Boost = nextFloat;

                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(FloatTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues norms = open.GetNormValues(FloatTestField);
            Assert.IsNotNull(norms);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                float expected = Convert.ToSingle(document.Get(FloatTestField));
                Assert.AreEqual(expected, Number.IntBitsToFloat((int)norms.Get(i)), 0.0f);
            }
            open.Dispose();
            dir.Dispose();
            docs.Dispose();
        }
        public void TestWithContexts()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), true, true);
            IDictionary <string, Document> docs = res.Value;
            List <string> invalidDocTerms       = res.Key;

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
                ISet <BytesRef>        oriCtxs    = new HashSet <BytesRef>();
                IEnumerable <BytesRef> contextSet = inputIterator.Contexts;
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXT_FIELD_NAME))
                {
                    oriCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(oriCtxs.size(), contextSet.Count());
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
Beispiel #12
0
        public void TestWithContext()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]    toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary      dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
            IInputEnumerator inputIterator = dictionary.GetEntryEnumerator();

            while (inputIterator.MoveNext())
            {
                string   field = inputIterator.Current.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(inputIterator.Current.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));

                // LUCENENET NOTE: This test was once failing because we used SCG.HashSet<T> whose
                // Equals() implementation does not check for set equality. As a result SortedInputEnumerator
                // had been modified to reverse the results to get the test to pass. However, using JCG.HashSet<T>
                // ensures that set equality (that is equality that doesn't care about order of items) is respected.
                // SortedInputEnumerator has also had the specific sorting removed.
                ISet <BytesRef> originalCtxs = new JCG.HashSet <BytesRef>();
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXTS_FIELD_NAME))
                {
                    originalCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(originalCtxs, inputIterator.Contexts);
            }
            assertTrue(docs.Count == 0);
            ir.Dispose();
            dir.Dispose();
        }
Beispiel #13
0
        public virtual void TestNegativeScores()
        {
            // The Top*Collectors previously filtered out documents with <= scores. this
            // behavior has changed. this test checks that if PositiveOnlyScoresFilter
            // wraps one of these collectors, documents with <= 0 scores are indeed
            // filtered.

            int numPositiveScores = 0;

            for (int i = 0; i < Scores.Length; i++)
            {
                if (Scores[i] > 0)
                {
                    ++numPositiveScores;
                }
            }

            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory);

            writer.Commit();
            IndexReader ir = writer.GetReader();

            writer.Dispose();
            IndexSearcher searcher          = NewSearcher(ir);
            Weight        fake              = (new TermQuery(new Term("fake", "weight"))).CreateWeight(searcher);
            Scorer        s                 = new SimpleScorer(fake);
            TopDocsCollector <ScoreDoc> tdc = TopScoreDocCollector.Create(Scores.Length, true);
            ICollector c = new PositiveScoresOnlyCollector(tdc);

            c.SetScorer(s);
            while (s.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                c.Collect(0);
            }
            TopDocs td = tdc.GetTopDocs();

            ScoreDoc[] sd = td.ScoreDocs;
            Assert.AreEqual(numPositiveScores, td.TotalHits);
            for (int i = 0; i < sd.Length; i++)
            {
                Assert.IsTrue(sd[i].Score > 0, "only positive scores should return: " + sd[i].Score);
            }
            ir.Dispose();
            directory.Dispose();
        }
Beispiel #14
0
        public override void SetUp()
        {
            base.SetUp();

            // initialize directory
            Directory = NewDirectory();
            Writer = new RandomIndexWriter(Random(), Directory);

            // write document
            Document doc = new Document();
            doc.Add(NewTextField("UUID", "1", Field.Store.YES));
            Writer.AddDocument(doc);
            Writer.Commit();
        }
        public virtual void TestBasic()
        {

            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            doc.Add(new SortedSetDocValuesFacetField("a", "bar"));
            doc.Add(new SortedSetDocValuesFacetField("a", "zoo"));
            doc.Add(new SortedSetDocValuesFacetField("b", "baz"));
            writer.AddDocument(config.Build(doc));
            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            Assert.AreEqual("dim=a path=[] value=4 childCount=3\n  foo (2)\n  bar (1)\n  zoo (1)\n", facets.GetTopChildren(10, "a").ToString());
            Assert.AreEqual("dim=b path=[] value=1 childCount=1\n  baz (1)\n", facets.GetTopChildren(10, "b").ToString());

            // DrillDown:
            DrillDownQuery q = new DrillDownQuery(config);
            q.Add("a", "foo");
            q.Add("b", "baz");
            TopDocs hits = searcher.Search(q, 1);
            Assert.AreEqual(1, hits.TotalHits);

            IOUtils.Close(writer, searcher.IndexReader, dir);
        }
Beispiel #16
0
        public virtual void TestReuseDocsEnumDifferentReader()
        {
            Directory    dir      = NewDirectory();
            Codec        cp       = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);

            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random);
            writer.Commit();

            DirectoryReader             firstReader  = DirectoryReader.Open(dir);
            DirectoryReader             secondReader = DirectoryReader.Open(dir);
            IList <AtomicReaderContext> leaves       = firstReader.Leaves;
            IList <AtomicReaderContext> leaves2      = secondReader.Leaves;

            foreach (AtomicReaderContext ctx in leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).GetTerms("body");
                TermsEnum iterator = terms.GetEnumerator();
                IDictionary <DocsEnum, bool?> enums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default);
                MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc);
                iterator = terms.GetEnumerator();
                DocsEnum docs = null;
                BytesRef term = null;
                while (iterator.MoveNext())
                {
                    term        = iterator.Term;
                    docs        = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);

                iterator = terms.GetEnumerator();
                enums.Clear();
                docs = null;
                while (iterator.MoveNext())
                {
                    term        = iterator.Term;
                    docs        = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);
            }
            IOUtils.Dispose(writer, firstReader, secondReader, dir);
        }
Beispiel #17
0
        public virtual void TestReuseDocsEnumSameBitsOrNull()
        {
            Directory         dir    = NewDirectory();
            Codec             cp     = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext ctx in open.Leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).GetTerms("body");
                TermsEnum iterator = terms.GetIterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(open.MaxDoc);
                DocsEnum    docs = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(1, enums.Count);
                enums.Clear();
                iterator = terms.GetIterator(null);
                docs     = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);

                enums.Clear();
                iterator = terms.GetIterator(null);
                docs     = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(null, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(1, enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
Beispiel #18
0
        public override void SetUp()
        {
            base.SetUp();
            dir    = NewDirectory();
            writer = new RandomIndexWriter(Random, dir);
            int numDocs = AtLeast(100);

            for (int i = 0; i < numDocs; i++)
            {
                writer.AddDocument(new Document());
                if (Rarely())
                {
                    writer.Commit();
                }
            }
        }
Beispiel #19
0
        public virtual void TestReuseDocsEnumDifferentReader()
        {
            Directory    dir      = NewDirectory();
            Codec        cp       = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat(OLD_FORMAT_IMPERSONATION_IS_ACTIVE));
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader             firstReader  = DirectoryReader.Open(dir);
            DirectoryReader             secondReader = DirectoryReader.Open(dir);
            IList <AtomicReaderContext> leaves       = firstReader.Leaves;
            IList <AtomicReaderContext> leaves2      = secondReader.Leaves;

            foreach (AtomicReaderContext ctx in leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).Terms("body");
                TermsEnum iterator = terms.Iterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc);
                iterator = terms.Iterator(null);
                DocsEnum docs = null;
                BytesRef term = null;
                while ((term = iterator.Next()) != null)
                {
                    docs        = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);

                iterator = terms.Iterator(null);
                enums.Clear();
                docs = null;
                while ((term = iterator.Next()) != null)
                {
                    docs        = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, firstReader, secondReader, dir);
        }
Beispiel #20
0
        public void TestWithoutPayload()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <string> invalidDocTerms       = res.Key;

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? weightField.GetInt64ValueOrDefault() : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }


            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public void TestWithContext()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
                ISet <BytesRef> originalCtxs = new HashSet <BytesRef>();
                foreach (IIndexableField ctxf in doc.GetFields(CONTEXTS_FIELD_NAME))
                {
                    originalCtxs.add(ctxf.GetBinaryValue());
                }
                assertEquals(originalCtxs, inputIterator.Contexts);
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
Beispiel #22
0
        /** Creates an index for sorting. */
        public void CreateIndex(Directory dir, int numDocs, Random random)
        {
            IList <int> ids = new List <int>();

            for (int i = 0; i < numDocs; i++)
            {
                ids.Add(i * 10);
            }
            // shuffle them for indexing
            // LUCENENET NOTE: Using LINQ, so we need to reassign the variable with the result
            ids = CollectionsHelper.Shuffle(ids);

            if (VERBOSE)
            {
                Console.WriteLine("Shuffled IDs for indexing: " + Arrays.ToString(ids.ToArray()));
            }

            PositionsTokenStream positions = new PositionsTokenStream();
            IndexWriterConfig    conf      = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetMaxBufferedDocs(4);                               // create some segments
            conf.SetSimilarity(new NormsSimilarity(conf.Similarity)); // for testing norms field
            using (RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf))
            {
                writer.RandomForceMerge = (false);
                foreach (int id in ids)
                {
                    writer.AddDocument(Doc(id, positions));
                }
                // delete some documents
                writer.Commit();
                foreach (int id in ids)
                {
                    if (random.NextDouble() < 0.2)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("delete doc_id " + id);
                        }
                        writer.DeleteDocuments(new Term(ID_FIELD, id.ToString()));
                    }
                }
            }
        }
Beispiel #23
0
        public override void SetUp()
        {
            base.SetUp();
            dir    = NewDirectory();
            writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            int numDocs = AtLeast(100);
            for (int i = 0; i < numDocs; i++)
            {
                writer.AddDocument(new Document());
                if (Rarely())
                {
                    writer.Commit();
                }
            }
        }
        public virtual void TestBinary()
        {
            Directory dir = NewDirectory();
            Document doc = new Document();
            BytesRef @ref = new BytesRef();
            Field field = new BinaryDocValuesField("bytes", @ref);
            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                @ref.CopyChars(TestUtil.RandomUnicodeString(Random()));
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;
            iw.ForceMerge(1);
            DirectoryReader ir2 = iw.Reader;
            AtomicReader merged = GetOnlySegmentReader(ir2);
            iw.Dispose();

            BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes");
            BinaryDocValues single = merged.GetBinaryDocValues("bytes");
            BytesRef actual = new BytesRef();
            BytesRef expected = new BytesRef();
            for (int i = 0; i < numDocs; i++)
            {
                single.Get(i, expected);
                multi.Get(i, actual);
                Assert.AreEqual(expected, actual);
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
        public void TestBasic()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair<List<string>, IDictionary<string, Document>> res = GenerateIndexDocuments(AtLeast(1000), true, false);
            IDictionary<string, Document> docs = res.Value;
            List<String> invalidDocTerms = res.Key;
            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();
            IndexReader ir = DirectoryReader.Open(dir);
            IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;
            BytesRef f;
            while ((f = inputIterator.Next()) != null)
            {
                string field = f.Utf8ToString();
                Document doc = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.Remove(f.Utf8ToString());
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.NumericValue) : 0);
                assertTrue(inputIterator.Payload.Equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue));
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public void TestEmptyReader()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            // Make sure the index is created?
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            writer.Commit();
            writer.Dispose();
            IndexReader ir = DirectoryReader.Open(dir);
            IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;

            assertNull(inputIterator.Next());
            assertEquals(inputIterator.Weight, 0);
            assertNull(inputIterator.Payload);

            ir.Dispose();
            dir.Dispose();
        }
        public void TestBasic()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                //Document doc = docs.remove(f.utf8ToString());
                long w1 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_1).NumericValue);
                long w2 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_2).NumericValue);
                long w3 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_3).NumericValue);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestEvilSearcherFactory()
        {
            Random            random = Random;
            Directory         dir    = NewDirectory();
            RandomIndexWriter w      = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                random, dir);

            w.Commit();

            IndexReader other = DirectoryReader.Open(dir);

            SearcherFactory theEvilOne = new SearcherFactoryAnonymousInnerClassHelper3(this, other);

            try
            {
                new SearcherManager(dir, theEvilOne);
            }
#pragma warning disable 168
            catch (InvalidOperationException ise)
#pragma warning restore 168
            {
                // expected
            }
            try
            {
                new SearcherManager(w.IndexWriter, random.NextBoolean(), theEvilOne);
            }
#pragma warning disable 168
            catch (InvalidOperationException ise)
#pragma warning restore 168
            {
                // expected
            }
            w.Dispose();
            other.Dispose();
            dir.Dispose();
        }
Beispiel #29
0
        public void TestWithoutPayload()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));

            foreach (Document doc in docs.Values)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            ValueSource[]  toAdd         = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2), new Int64FieldSource(WEIGHT_FIELD_NAME_3) };
            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd));
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                long w3 = doc.GetField(WEIGHT_FIELD_NAME_3).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, (w1 + w2 + w3));
                assertEquals(inputIterator.Payload, null);
            }
            assertTrue(docs.Count == 0);
            ir.Dispose();
            dir.Dispose();
        }
        public void TestEmptyReader()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            // Make sure the index is created?
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);

            writer.Commit();
            writer.Dispose();
            IndexReader    ir            = DirectoryReader.Open(dir);
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();

            assertNull(inputIterator.Next());
            assertEquals(inputIterator.Weight, 0);
            assertNull(inputIterator.Payload);

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestNumerics()
        {
            Directory dir = NewDirectory();
            Document doc = new Document();
            Field field = new NumericDocValuesField("numbers", 0);
            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                field.LongValue = Random().NextLong();
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;
            iw.ForceMerge(1);
            DirectoryReader ir2 = iw.Reader;
            AtomicReader merged = GetOnlySegmentReader(ir2);
            iw.Dispose();

            NumericDocValues multi = MultiDocValues.GetNumericValues(ir, "numbers");
            NumericDocValues single = merged.GetNumericDocValues("numbers");
            for (int i = 0; i < numDocs; i++)
            {
                Assert.AreEqual(single.Get(i), multi.Get(i));
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, true));

            for (int i = 900; i < 1112; i++)
            {
                Document doc = new Document();
                string   num = Regex.Replace(Regex.Replace(English.Int32ToEnglish(i), "[-]", " "), "[,]", "");
                doc.Add(NewTextField("numbers", num, Field.Store.NO));
                writer.AddDocument(doc);
            }

            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "hundredeight eightyeight yeight", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "tres y cinco", Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Commit();
            writer.Dispose();
        }
        public virtual void TestEvilSearcherFactory()
        {
            Random            random = Random();
            Directory         dir    = NewDirectory();
            RandomIndexWriter w      = new RandomIndexWriter(random, dir, Similarity, TimeZone);

            w.Commit();

            IndexReader other = DirectoryReader.Open(dir);

            SearcherFactory theEvilOne = new SearcherFactoryAnonymousInnerClassHelper3(this, other);

            try
            {
                new SearcherManager(dir, theEvilOne);
            }
#pragma warning disable 168
            catch (InvalidOperationException ise)
#pragma warning restore 168
            {
                // expected
            }
            try
            {
                new SearcherManager(w.w, random.NextBoolean(), theEvilOne);
            }
#pragma warning disable 168
            catch (InvalidOperationException ise)
#pragma warning restore 168
            {
                // expected
            }
            w.Dispose();
            other.Dispose();
            dir.Dispose();
        }
Beispiel #34
0
        public virtual void TestSlowCompositeReaderWrapper()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new SortedSetDocValuesFacetField("a", "foo1"));
            writer.AddDocument(config.Build(doc));

            writer.Commit();

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo2"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.Reader));

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = new SortedSetDocValuesFacetCounts(state, c);

            // Ask for top 10 labels for any dims that have counts:
            Assert.AreEqual("dim=a path=[] value=2 childCount=2\n  foo1 (1)\n  foo2 (1)\n", facets.GetTopChildren(10, "a").ToString());

            IOUtils.Close(writer, searcher.IndexReader, dir);
        }
Beispiel #35
0
        public virtual void TestEmptyBucketWithMoreDocs()
        {
            // this test checks the logic of nextDoc() when all sub scorers have docs
            // beyond the first bucket (for example). Currently, the code relies on the
            // 'more' variable to work properly, and this test ensures that if the logic
            // changes, we have a test to back it up.

            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory);

            writer.Commit();
            IndexReader ir = writer.GetReader();

            writer.Dispose();
            IndexSearcher searcher = NewSearcher(ir);
            BooleanWeight weight   = (BooleanWeight)(new BooleanQuery()).CreateWeight(searcher);

            BulkScorer[] scorers = new BulkScorer[] {
                new BulkScorerAnonymousInnerClassHelper()
            };

            BooleanScorer bs = new BooleanScorer(weight, false, 1, scorers, Collections.EmptyList <BulkScorer>(), scorers.Length);

            IList <int> hits = new List <int>();

            bs.Score(new CollectorAnonymousInnerClassHelper(this, hits));

            Assert.AreEqual(1, hits.Count, "should have only 1 hit");
            Assert.AreEqual(3000, (int)hits[0], "hit should have been docID=3000");
            ir.Dispose();
            directory.Dispose();
        }
Beispiel #36
0
        public virtual void TestEvilSearcherFactory()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir);

            w.Commit();

            IndexReader other = DirectoryReader.Open(dir);

            SearcherFactory theEvilOne = new SearcherFactoryAnonymousClass2(this, other);

            try
            {
                new SearcherManager(w.IndexWriter, false, theEvilOne);
                fail("didn't hit expected exception");
            }
            catch (Exception ise) when(ise.IsIllegalStateException())
            {
                // expected
            }
            w.Dispose();
            other.Dispose();
            dir.Dispose();
        }
        public virtual void TestEvilSearcherFactory()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir);

            w.Commit();

            IndexReader other = DirectoryReader.Open(dir);

            SearcherFactory theEvilOne = new SearcherFactoryAnonymousInnerClassHelper2(this, other);

            try
            {
                new SearcherManager(w.w, false, theEvilOne);
                Assert.Fail("didn't hit expected exception");
            }
            catch (InvalidOperationException ise)
            {
                // expected
            }
            w.Dispose();
            other.Dispose();
            dir.Dispose();
        }
        public virtual void TestEvilSearcherFactory()
        {
            Random            random = Random;
            Directory         dir    = NewDirectory();
            RandomIndexWriter w      = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                random, dir);

            w.Commit();

            IndexReader other = DirectoryReader.Open(dir);

            SearcherFactory theEvilOne = new SearcherFactoryAnonymousClass3(this, other);

            try
            {
                new SearcherManager(dir, theEvilOne);
            }
            catch (Exception ise) when(ise.IsIllegalStateException())
            {
                // expected
            }
            try
            {
                new SearcherManager(w.IndexWriter, random.NextBoolean(), theEvilOne);
            }
            catch (Exception ise) when(ise.IsIllegalStateException())
            {
                // expected
            }
            w.Dispose();
            other.Dispose();
            dir.Dispose();
        }
Beispiel #39
0
        public void TestInsideBooleanQuery()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "0", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField,
                                                       new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);

            BooleanQuery bq = new BooleanQuery();

            bq.Add(joinQuery, Occur.SHOULD);
            bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD);

            indexSearcher.Search(bq, new CollectorAnonymousClass());

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
 public virtual void TestCommitThreadSafety()
 {
     const int NUM_THREADS = 5;
     const double RUN_SEC = 0.5;
     Directory dir = NewDirectory();
     RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
     TestUtil.ReduceOpenFiles(w.w);
     w.Commit();
     AtomicBoolean failed = new AtomicBoolean();
     ThreadClass[] threads = new ThreadClass[NUM_THREADS];
     long endTime = DateTime.Now.Millisecond + ((long)(RUN_SEC * 1000));
     for (int i = 0; i < NUM_THREADS; i++)
     {
         int finalI = i;
         threads[i] = new ThreadAnonymousInnerClassHelper(this, dir, w, failed, endTime, finalI);
         threads[i].Start();
     }
     for (int i = 0; i < NUM_THREADS; i++)
     {
         threads[i].Join();
     }
     Assert.IsFalse(failed.Get());
     w.Dispose();
     dir.Dispose();
 }
        private void DoTest(FieldInfo.DocValuesType_e type)
        {
            Directory d = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            int nDocs = AtLeast(50);
            Field id = new NumericDocValuesField("id", 0);
            Field f;
            switch (type)
            {
                case FieldInfo.DocValuesType_e.BINARY:
                    f = new BinaryDocValuesField("dv", new BytesRef());
                    break;
                case FieldInfo.DocValuesType_e.SORTED:
                    f = new SortedDocValuesField("dv", new BytesRef());
                    break;
                case FieldInfo.DocValuesType_e.NUMERIC:
                    f = new NumericDocValuesField("dv", 0);
                    break;
                default:
                    throw new InvalidOperationException();
            }
            Document document = new Document();
            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random(), d, iwConfig);
            for (int i = 0; i < nDocs; ++i)
            {
                id.LongValue = i;
                switch (type)
                {
                    case FieldInfo.DocValuesType_e.SORTED:
                    case FieldInfo.DocValuesType_e.BINARY:
                        do
                        {
                            vals[i] = TestUtil.RandomSimpleString(Random(), 20);
                        } while (((string)vals[i]).Length == 0);
                        f.BytesValue = new BytesRef((string)vals[i]);
                        break;
                    case FieldInfo.DocValuesType_e.NUMERIC:
                        int bitsPerValue = Random().NextIntBetween(1, 31); // keep it an int
                        vals[i] = (long)Random().Next((int)PackedInts.MaxValue(bitsPerValue));
                        f.LongValue = (long) vals[i];
                        break;
                }
                iw.AddDocument(document);
                if (Random().NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);
            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new LongFieldSource("id")).GetValues(null, leave);
                ValueSource vs;
                switch (type)
                {
                    case FieldInfo.DocValuesType_e.BINARY:
                    case FieldInfo.DocValuesType_e.SORTED:
                        vs = new BytesRefFieldSource("dv");
                        break;
                    case FieldInfo.DocValuesType_e.NUMERIC:
                        vs = new LongFieldSource("dv");
                        break;
                    default:
                        throw new InvalidOperationException();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef bytes = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is LongFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is long?);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }

                    object expected = vals[ids.IntVal(i)];
                    switch (type)
                    {
                        case FieldInfo.DocValuesType_e.SORTED:
                            values.OrdVal(i); // no exception
                            assertTrue(values.NumOrd() >= 1);
                            goto case FieldInfo.DocValuesType_e.BINARY;
                        case FieldInfo.DocValuesType_e.BINARY:
                            assertEquals(expected, values.ObjectVal(i));
                            assertEquals(expected, values.StrVal(i));
                            assertEquals(expected, values.ObjectVal(i));
                            assertEquals(expected, values.StrVal(i));
                            assertTrue(values.BytesVal(i, bytes));
                            assertEquals(new BytesRef((string)expected), bytes);
                            break;
                        case FieldInfo.DocValuesType_e.NUMERIC:
                            assertEquals(Number.ToInt64(expected.ToString()), values.LongVal(i));
                            break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
 // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
 public virtual void TestMerge()
 {
     RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20);
     int numDocs = AtLeast(100);
     int numDeletes = Random().Next(numDocs);
     HashSet<int?> deletes = new HashSet<int?>();
     while (deletes.Count < numDeletes)
     {
         deletes.Add(Random().Next(numDocs));
     }
     foreach (Options options in ValidOptions())
     {
         RandomDocument[] docs = new RandomDocument[numDocs];
         for (int i = 0; i < numDocs; ++i)
         {
             docs[i] = docFactory.NewDocument(TestUtil.NextInt(Random(), 1, 3), AtLeast(10), options);
         }
         Directory dir = NewDirectory();
         RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, ClassEnvRule.Similarity, ClassEnvRule.TimeZone);
         for (int i = 0; i < numDocs; ++i)
         {
             writer.AddDocument(AddId(docs[i].ToDocument(), "" + i));
             if (Rarely())
             {
                 writer.Commit();
             }
         }
         foreach (int delete in deletes)
         {
             writer.DeleteDocuments(new Term("id", "" + delete));
         }
         // merge with deletes
         writer.ForceMerge(1);
         IndexReader reader = writer.Reader;
         for (int i = 0; i < numDocs; ++i)
         {
             if (!deletes.Contains(i))
             {
                 int docID = DocID(reader, "" + i);
                 AssertEquals(docs[i], reader.GetTermVectors(docID));
             }
         }
         reader.Dispose();
         writer.Dispose();
         dir.Dispose();
     }
 }
        public virtual void TestLocksBlock()
        {
            Directory src = NewDirectory();
            RandomIndexWriter w1 = new RandomIndexWriter(Random(), src, Similarity, TimeZone);
            w1.AddDocument(new Document());
            w1.Commit();

            Directory dest = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetWriteLockTimeout(1);
            RandomIndexWriter w2 = new RandomIndexWriter(Random(), dest, iwc);

            try
            {
                w2.AddIndexes(src);
                Assert.Fail("did not hit expected exception");
            }
            catch (LockObtainFailedException lofe)
            {
                // expected
            }

            IOUtils.Close(w1, w2, src, dest);
        }
        public void TestEmptyDocs()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            // make sure that the fact that documents might be empty is not a problem
            Document emptyDoc = new Document();
            int numDocs = Random().NextBoolean() ? 1 : AtLeast(1000);
            for (int i = 0; i < numDocs; ++i)
            {
                iw.AddDocument(emptyDoc);
            }
            iw.Commit();
            DirectoryReader rd = DirectoryReader.Open(dir);
            for (int i = 0; i < numDocs; ++i)
            {
                Document doc = rd.Document(i);
                Assert.IsNotNull(doc);
                Assert.IsTrue(doc.Fields.Count <= 0);
            }
            rd.Dispose();

            iw.Dispose();
            dir.Dispose();
        }
Beispiel #45
0
        public override void SetUp()
        {
            base.SetUp();

            AllSortFields = new List<SortField>(Arrays.AsList(new SortField[] { new SortField("byte", SortField.Type_e.BYTE, false), new SortField("short", SortField.Type_e.SHORT, false), new SortField("int", SortField.Type_e.INT, false), new SortField("long", SortField.Type_e.LONG, false), new SortField("float", SortField.Type_e.FLOAT, false), new SortField("double", SortField.Type_e.DOUBLE, false), new SortField("bytes", SortField.Type_e.STRING, false), new SortField("bytesval", SortField.Type_e.STRING_VAL, false), new SortField("byte", SortField.Type_e.BYTE, true), new SortField("short", SortField.Type_e.SHORT, true), new SortField("int", SortField.Type_e.INT, true), new SortField("long", SortField.Type_e.LONG, true), new SortField("float", SortField.Type_e.FLOAT, true), new SortField("double", SortField.Type_e.DOUBLE, true), new SortField("bytes", SortField.Type_e.STRING, true), new SortField("bytesval", SortField.Type_e.STRING_VAL, true), SortField.FIELD_SCORE, SortField.FIELD_DOC }));

            if (SupportsDocValues)
            {
                AllSortFields.AddRange(Arrays.AsList(new SortField[] { new SortField("intdocvalues", SortField.Type_e.INT, false), new SortField("floatdocvalues", SortField.Type_e.FLOAT, false), new SortField("sortedbytesdocvalues", SortField.Type_e.STRING, false), new SortField("sortedbytesdocvaluesval", SortField.Type_e.STRING_VAL, false), new SortField("straightbytesdocvalues", SortField.Type_e.STRING_VAL, false), new SortField("intdocvalues", SortField.Type_e.INT, true), new SortField("floatdocvalues", SortField.Type_e.FLOAT, true), new SortField("sortedbytesdocvalues", SortField.Type_e.STRING, true), new SortField("sortedbytesdocvaluesval", SortField.Type_e.STRING_VAL, true), new SortField("straightbytesdocvalues", SortField.Type_e.STRING_VAL, true) }));
            }

            // Also test missing first / last for the "string" sorts:
            foreach (string field in new string[] { "bytes", "sortedbytesdocvalues" })
            {
                for (int rev = 0; rev < 2; rev++)
                {
                    bool reversed = rev == 0;
                    SortField sf = new SortField(field, SortField.Type_e.STRING, reversed);
                    sf.MissingValue = SortField.STRING_FIRST;
                    AllSortFields.Add(sf);

                    sf = new SortField(field, SortField.Type_e.STRING, reversed);
                    sf.MissingValue = SortField.STRING_LAST;
                    AllSortFields.Add(sf);
                }
            }

            int limit = AllSortFields.Count;
            for (int i = 0; i < limit; i++)
            {
                SortField sf = AllSortFields[i];
                if (sf.Type == SortField.Type_e.INT)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.INT, sf.Reverse);
                    sf2.MissingValue = Random().Next();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortField.Type_e.LONG)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.LONG, sf.Reverse);
                    sf2.MissingValue = Random().NextLong();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortField.Type_e.FLOAT)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.FLOAT, sf.Reverse);
                    sf2.MissingValue = (float)Random().NextDouble();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortField.Type_e.DOUBLE)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.DOUBLE, sf.Reverse);
                    sf2.MissingValue = Random().NextDouble();
                    AllSortFields.Add(sf2);
                }
            }

            Dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir);
            int numDocs = AtLeast(200);
            for (int i = 0; i < numDocs; i++)
            {
                IList<Field> fields = new List<Field>();
                fields.Add(NewTextField("english", English.IntToEnglish(i), Field.Store.NO));
                fields.Add(NewTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
                fields.Add(NewStringField("byte", "" + ((sbyte)Random().Next()), Field.Store.NO));
                fields.Add(NewStringField("short", "" + ((short)Random().Next()), Field.Store.NO));
                fields.Add(new IntField("int", Random().Next(), Field.Store.NO));
                fields.Add(new LongField("long", Random().NextLong(), Field.Store.NO));

                fields.Add(new FloatField("float", (float)Random().NextDouble(), Field.Store.NO));
                fields.Add(new DoubleField("double", Random().NextDouble(), Field.Store.NO));
                fields.Add(NewStringField("bytes", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO));
                fields.Add(NewStringField("bytesval", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO));
                fields.Add(new DoubleField("double", Random().NextDouble(), Field.Store.NO));

                if (SupportsDocValues)
                {
                    fields.Add(new NumericDocValuesField("intdocvalues", Random().Next()));
                    fields.Add(new FloatDocValuesField("floatdocvalues", (float)Random().NextDouble()));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()))));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()))));
                    fields.Add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()))));
                }
                Document document = new Document();
                document.Add(new StoredField("id", "" + i));
                if (VERBOSE)
                {
                    Console.WriteLine("  add doc id=" + i);
                }
                foreach (Field field in fields)
                {
                    // So we are sometimes missing that field:
                    if (Random().Next(5) != 4)
                    {
                        document.Add(field);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + field);
                        }
                    }
                }

                iw.AddDocument(document);

                if (Random().Next(50) == 17)
                {
                    iw.Commit();
                }
            }
            Reader = iw.Reader;
            iw.Dispose();
            Searcher = NewSearcher(Reader);
            if (VERBOSE)
            {
                Console.WriteLine("  searcher=" + Searcher);
            }
        }
        public void TestBigDocuments()
        {
            // "big" as "much bigger than the chunk size"
            // for this test we force a FS dir
            // we can't just use newFSDirectory, because this test doesn't really index anything.
            // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
            Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments")));
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            Document emptyDoc = new Document(); // emptyDoc
            Document bigDoc1 = new Document(); // lot of small fields
            Document bigDoc2 = new Document(); // 1 very big field

            Field idField = new StringField("id", "", Field.Store.NO);
            emptyDoc.Add(idField);
            bigDoc1.Add(idField);
            bigDoc2.Add(idField);

            FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
            onlyStored.Indexed = false;

            Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored);
            int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000);
            for (int i = 0; i < numFields; ++i)
            {
                bigDoc1.Add(smallField);
            }

            Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored);
            bigDoc2.Add(bigField);

            int numDocs = AtLeast(5);
            Document[] docs = new Document[numDocs];
            for (int i = 0; i < numDocs; ++i)
            {
                docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2));
            }
            for (int i = 0; i < numDocs; ++i)
            {
                idField.StringValue = "" + i;
                iw.AddDocument(docs[i]);
                if (Random().Next(numDocs) == 0)
                {
                    iw.Commit();
                }
            }
            iw.Commit();
            iw.ForceMerge(1); // look at what happens when big docs are merged
            DirectoryReader rd = DirectoryReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(rd);
            for (int i = 0; i < numDocs; ++i)
            {
                Query query = new TermQuery(new Term("id", "" + i));
                TopDocs topDocs = searcher.Search(query, 1);
                Assert.AreEqual(1, topDocs.TotalHits, "" + i);
                Document doc = rd.Document(topDocs.ScoreDocs[0].Doc);
                Assert.IsNotNull(doc);
                IndexableField[] fieldValues = doc.GetFields("fld");
                Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length);
                if (fieldValues.Length > 0)
                {
                    Assert.AreEqual(docs[i].GetFields("fld")[0].BinaryValue(), fieldValues[0].BinaryValue());
                }
            }
            rd.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public void TestConcurrentReads()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            // make sure the readers are properly cloned
            Document doc = new Document();
            Field field = new StringField("fld", "", Field.Store.YES);
            doc.Add(field);
            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; ++i)
            {
                field.StringValue = "" + i;
                iw.AddDocument(doc);
            }
            iw.Commit();

            DirectoryReader rd = DirectoryReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(rd);
            int concurrentReads = AtLeast(5);
            int readsPerThread = AtLeast(50);
            IList<ThreadClass> readThreads = new List<ThreadClass>();
            AtomicReference<Exception> ex = new AtomicReference<Exception>();
            for (int i = 0; i < concurrentReads; ++i)
            {
                readThreads.Add(new ThreadAnonymousInnerClassHelper(numDocs, rd, searcher, readsPerThread, ex, i));
            }
            foreach (ThreadClass thread in readThreads)
            {
                thread.Start();
            }
            foreach (ThreadClass thread in readThreads)
            {
                thread.Join();
            }
            rd.Dispose();
            if (ex.Value != null)
            {
                throw ex.Value;
            }

            iw.Dispose();
            dir.Dispose();
        }
Beispiel #48
0
        public void TestSimpleWithScoring()
        {
            const string idField = "id";
            const string toField = "movieId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "A random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for movie via subtitle
            Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
                                                       new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            // Score mode max.
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Max);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            // Score mode total
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Total);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            //Score mode avg
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Avg);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #49
0
        public void TestNestedDocScoringWithDeletes()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            // Cannot assert this since we use NoMergePolicy:
            w.DoRandomForceMergeAssert = false;

            IList<Document> docs = new List<Document>();
            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            docs.Clear();
            docs.Add(MakeJob("c", 1999));
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            w.AddDocuments(docs);

            w.Commit();
            IndexSearcher s = NewSearcher(DirectoryReader.Open(dir));

            ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(NumericRangeQuery.NewIntRange("year", 1990, 2010, true, true), new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), ScoreMode.Total);

            TopDocs topDocs = s.Search(q, 10);
            assertEquals(2, topDocs.TotalHits);
            assertEquals(6, topDocs.ScoreDocs[0].Doc);
            assertEquals(3.0f, topDocs.ScoreDocs[0].Score, 0.0f);
            assertEquals(2, topDocs.ScoreDocs[1].Doc);
            assertEquals(2.0f, topDocs.ScoreDocs[1].Score, 0.0f);

            s.IndexReader.Dispose();
            w.DeleteDocuments(new Term("skill", "java"));
            w.Dispose();
            s = NewSearcher(DirectoryReader.Open(dir));

            topDocs = s.Search(q, 10);
            assertEquals(2, topDocs.TotalHits);
            assertEquals(6, topDocs.ScoreDocs[0].Doc);
            assertEquals(2.0f, topDocs.ScoreDocs[0].Score, 0.0f);
            assertEquals(2, topDocs.ScoreDocs[1].Doc);
            assertEquals(1.0f, topDocs.ScoreDocs[1].Score, 0.0f);

            s.IndexReader.Dispose();
            dir.Dispose();
        }
 public void TestBulkMergeWithDeletes()
 {
     int numDocs = AtLeast(200);
     Directory dir = NewDirectory();
     RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = new Document();
         doc.Add(new StringField("id", Convert.ToString(i), Field.Store.YES));
         doc.Add(new StoredField("f", TestUtil.RandomSimpleString(Random())));
         w.AddDocument(doc);
     }
     int deleteCount = TestUtil.NextInt(Random(), 5, numDocs);
     for (int i = 0; i < deleteCount; ++i)
     {
         int id = Random().Next(numDocs);
         w.DeleteDocuments(new Term("id", Convert.ToString(id)));
     }
     w.Commit();
     w.Dispose();
     w = new RandomIndexWriter(Random(), dir);
     w.ForceMerge(TestUtil.NextInt(Random(), 1, 3));
     w.Commit();
     w.Dispose();
     TestUtil.CheckIndex(dir);
     dir.Dispose();
 }
Beispiel #51
0
        public void TestSimple()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
                                                       indexSearcher, ScoreMode.None);

            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(4, result.ScoreDocs[0].Doc);
            assertEquals(5, result.ScoreDocs[1].Doc);

            joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(1, result.ScoreDocs[0].Doc);
            assertEquals(2, result.ScoreDocs[1].Doc);

            // Search for offer
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(1, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
        public void TestTotalGroupCount()
        {

            string groupField = "author";
            FieldType customType = new FieldType();
            customType.Stored = true;

            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
                Random(),
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                    new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            bool canUseIDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);

            // 0
            Document doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "random text", Field.Store.YES));
            doc.Add(new Field("id", "1", customType));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.YES));
            doc.Add(new Field("id", "2", customType));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.YES));
            doc.Add(new Field("id", "3", customType));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV);
            doc.Add(new TextField("content", "some random text", Field.Store.YES));
            doc.Add(new Field("id", "4", customType));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
            doc.Add(new Field("id", "5", customType));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "random blob", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.Reader);
            w.Dispose();

            IAbstractAllGroupsCollector<object> allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupsCollector as Collector);
            assertEquals(4, allGroupsCollector.GroupCount);

            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupsCollector as Collector);
            assertEquals(3, allGroupsCollector.GroupCount);

            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
            // so this cast is not necessary. Consider eliminating the Collector abstract class.
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupsCollector as Collector);
            assertEquals(2, allGroupsCollector.GroupCount);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
        public virtual void TestTermUTF16SortOrder()
        {
            Random rnd = Random();
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(rnd, dir, Similarity, TimeZone);
            Document d = new Document();
            // Single segment
            Field f = NewStringField("f", "", Field.Store.NO);
            d.Add(f);
            char[] chars = new char[2];
            HashSet<string> allTerms = new HashSet<string>();

            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s;
                if (rnd.NextBoolean())
                {
                    // Single char
                    if (rnd.NextBoolean())
                    {
                        // Above surrogates
                        chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff);
                    }
                    else
                    {
                        // Below surrogates
                        chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1);
                    }
                    s = new string(chars, 0, 1);
                }
                else
                {
                    // Surrogate pair
                    chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END);
                    Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END);
                    chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END);
                    s = new string(chars, 0, 2);
                }
                allTerms.Add(s);
                f.StringValue = s;

                writer.AddDocument(d);

                if ((1 + i) % 42 == 0)
                {
                    writer.Commit();
                }
            }

            IndexReader r = writer.Reader;

            // Test each sub-segment
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                CheckTermsOrder(ctx.Reader, allTerms, false);
            }
            CheckTermsOrder(r, allTerms, true);

            // Test multi segment
            r.Dispose();

            writer.ForceMerge(1);

            // Test single segment
            r = writer.Reader;
            CheckTermsOrder(r, allTerms, true);
            r.Dispose();

            writer.Dispose();
            dir.Dispose();
        }
Beispiel #54
0
        // TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs

        public virtual void BuildIndex(Directory dir)
        {
            Random random = Random();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity provider = new MySimProvider(this);
            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                Document doc = docs.NextDoc();
                int boost = Random().Next(255);
                Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES);
                f.Boost = boost;
                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(ByteTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            docs.Dispose();
        }
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <String> invalidDocTerms       = res.Key;
            Random        rand       = Random();
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                IIndexableField f2 = doc.GetField(FIELD_NAME);
                if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.GetStringValue()))
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                assertTrue(toDel != null);
                docs.Remove(termToDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertEquals(ir.NumDocs, docs.size());
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public void TestReadSkip()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            FieldType ft = new FieldType();
            ft.Stored = true;
            ft.Freeze();

            string @string = TestUtil.RandomSimpleString(Random(), 50);
            var bytes = @string.GetBytes(IOUtils.CHARSET_UTF_8);
            long l = Random().NextBoolean() ? Random().Next(42) : Random().NextLong();
            int i = Random().NextBoolean() ? Random().Next(42) : Random().Next();
            float f = Random().NextFloat();
            double d = Random().NextDouble();

            IList<Field> fields = Arrays.AsList(new Field("bytes", bytes, ft), new Field("string", @string, ft), new LongField("long", l, Field.Store.YES), new IntField("int", i, Field.Store.YES), new FloatField("float", f, Field.Store.YES), new DoubleField("double", d, Field.Store.YES)
               );

            for (int k = 0; k < 100; ++k)
            {
                Document doc = new Document();
                foreach (Field fld in fields)
                {
                    doc.Add(fld);
                }
                iw.w.AddDocument(doc);
            }
            iw.Commit();

            DirectoryReader reader = DirectoryReader.Open(dir);
            int docID = Random().Next(100);
            foreach (Field fld in fields)
            {
                string fldName = fld.Name();
                Document sDoc = reader.Document(docID, Collections.Singleton(fldName));
                IndexableField sField = sDoc.GetField(fldName);
                if (typeof(Field) == fld.GetType())
                {
                    Assert.AreEqual(fld.BinaryValue(), sField.BinaryValue());
                    Assert.AreEqual(fld.StringValue, sField.StringValue);
                }
                else
                {
                    Assert.AreEqual(fld.NumericValue, sField.NumericValue);
                }
            }
            reader.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Beispiel #57
0
        public void TestTotalGroupCount()
        {
            string    groupField = "author";
            FieldType customType = new FieldType();

            customType.IsStored = true;

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            bool canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);

            // 0
            Document doc = new Document();

            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "random text", Field.Store.YES));
            doc.Add(new Field("id", "1", customType));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.YES));
            doc.Add(new Field("id", "2", customType));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.YES));
            doc.Add(new Field("id", "3", customType));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV);
            doc.Add(new TextField("content", "some random text", Field.Store.YES));
            doc.Add(new Field("id", "4", customType));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
            doc.Add(new Field("id", "5", customType));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "random blob", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            w.Dispose();

            IAbstractAllGroupsCollector <object> allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupsCollector);
            assertEquals(4, allGroupsCollector.GroupCount);

            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupsCollector);
            assertEquals(3, allGroupsCollector.GroupCount);

            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupsCollector);
            assertEquals(2, allGroupsCollector.GroupCount);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
        public virtual void TestSortedSetWithDups()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                int numValues = Random().Next(5);
                for (int j = 0; j < numValues; j++)
                {
                    doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random(), 2))));
                }
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;
            iw.ForceMerge(1);
            DirectoryReader ir2 = iw.Reader;
            AtomicReader merged = GetOnlySegmentReader(ir2);
            iw.Dispose();

            SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes");
            SortedSetDocValues single = merged.GetSortedSetDocValues("bytes");
            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.ValueCount, multi.ValueCount);
                BytesRef actual = new BytesRef();
                BytesRef expected = new BytesRef();
                // check values
                for (long i = 0; i < single.ValueCount; i++)
                {
                    single.LookupOrd(i, expected);
                    multi.LookupOrd(i, actual);
                    Assert.AreEqual(expected, actual);
                }
                // check ord list
                for (int i = 0; i < numDocs; i++)
                {
                    single.Document = i;
                    List<long?> expectedList = new List<long?>();
                    long ord;
                    while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        expectedList.Add(ord);
                    }

                    multi.Document = i;
                    int upto = 0;
                    while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        Assert.AreEqual((long)expectedList[upto], ord);
                        upto++;
                    }
                    Assert.AreEqual(expectedList.Count, upto);
                }
            }

            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
 public virtual void TestListAll()
 {
     Directory dir = NewDirectory();
     // riw should sometimes create docvalues fields, etc
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
     Document doc = new Document();
     // these fields should sometimes get term vectors, etc
     Field idField = NewStringField("id", "", Field.Store.NO);
     Field bodyField = NewTextField("body", "", Field.Store.NO);
     doc.Add(idField);
     doc.Add(bodyField);
     for (int i = 0; i < 100; i++)
     {
         idField.StringValue = Convert.ToString(i);
         bodyField.StringValue = TestUtil.RandomUnicodeString(Random());
         riw.AddDocument(doc);
         if (Random().Next(7) == 0)
         {
             riw.Commit();
         }
     }
     riw.Dispose();
     CheckFiles(dir);
     dir.Dispose();
 }
        public virtual void TestDocsWithField()
        {
            AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField());
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                if (Random().Next(4) >= 0)
                {
                    doc.Add(new NumericDocValuesField("numbers", Random().NextLong()));
                }
                doc.Add(new NumericDocValuesField("numbersAlways", Random().NextLong()));
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;
            iw.ForceMerge(1);
            DirectoryReader ir2 = iw.Reader;
            AtomicReader merged = GetOnlySegmentReader(ir2);
            iw.Dispose();

            Bits multi = MultiDocValues.GetDocsWithField(ir, "numbers");
            Bits single = merged.GetDocsWithField("numbers");
            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.Length(), multi.Length());
                for (int i = 0; i < numDocs; i++)
                {
                    Assert.AreEqual(single.Get(i), multi.Get(i));
                }
            }

            multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways");
            single = merged.GetDocsWithField("numbersAlways");
            Assert.AreEqual(single.Length(), multi.Length());
            for (int i = 0; i < numDocs; i++)
            {
                Assert.AreEqual(single.Get(i), multi.Get(i));
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }