public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
     for (int i = 0; i < DocFields.Length; i++)
     {
         Document doc = new Document();
         doc.Add(NewTextField(FIELD, DocFields[i], Field.Store.NO));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
 }
Example #2
0
        public override void SetUp()
        {
            base.SetUp();
            Directory = NewDirectory();

            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity()));
            for (int i = 0; i < Values.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(FIELD, Values[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader = SlowCompositeReaderWrapper.Wrap(writer.Reader);
            writer.Dispose();
            IndexSearcher = NewSearcher(IndexReader);
            IndexSearcher.Similarity = new DefaultSimilarity();
        }
Example #3
0
        public virtual void Test()
        {
            Random       random   = new Random(Random.Next());
            LineFileDocs docs     = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            Directory    d        = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                LuceneTestCase.Random, d, analyzer);
            int numDocs = AtLeast(10);

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                w.AddDocument(docs.NextDoc());
            }
            IndexReader r = w.GetReader();

            w.Dispose();

            List <BytesRef> terms     = new List <BytesRef>();
            TermsEnum       termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null);
            BytesRef        term;

            while ((term = termsEnum.Next()) != null)
            {
                terms.Add(BytesRef.DeepCopyOf(term));
            }
            if (Verbose)
            {
                Console.WriteLine("TEST: " + terms.Count + " terms");
            }

            int upto  = -1;
            int iters = AtLeast(200);

            for (int iter = 0; iter < iters; iter++)
            {
                bool isEnd;
                if (upto != -1 && LuceneTestCase.Random.NextBoolean())
                {
                    // next
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: iter next");
                    }
                    isEnd = termsEnum.Next() == null;
                    upto++;
                    if (isEnd)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("  end");
                        }
                        Assert.AreEqual(upto, terms.Count);
                        upto = -1;
                    }
                    else
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("  got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString());
                        }
                        Assert.IsTrue(upto < terms.Count);
                        Assert.AreEqual(terms[upto], termsEnum.Term);
                    }
                }
                else
                {
                    BytesRef target;
                    string   exists;
                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        // likely fake term
                        if (LuceneTestCase.Random.NextBoolean())
                        {
                            target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random));
                        }
                        else
                        {
                            target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random));
                        }
                        exists = "likely not";
                    }
                    else
                    {
                        // real term
                        target = terms[LuceneTestCase.Random.Next(terms.Count)];
                        exists = "yes";
                    }

                    upto = terms.BinarySearch(target);

                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekCeil
                        TermsEnum.SeekStatus status = termsEnum.SeekCeil(target);
                        if (Verbose)
                        {
                            Console.WriteLine("  got " + status);
                        }

                        if (upto < 0)
                        {
                            upto = -(upto + 1);
                            if (upto >= terms.Count)
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.END, status);
                                upto = -1;
                            }
                            else
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);
                                Assert.AreEqual(terms[upto], termsEnum.Term);
                            }
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status);
                            Assert.AreEqual(terms[upto], termsEnum.Term);
                        }
                    }
                    else
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekExact
                        bool result = termsEnum.SeekExact(target);
                        if (Verbose)
                        {
                            Console.WriteLine("  got " + result);
                        }
                        if (upto < 0)
                        {
                            Assert.IsFalse(result);
                            upto = -1;
                        }
                        else
                        {
                            Assert.IsTrue(result);
                            Assert.AreEqual(target, termsEnum.Term);
                        }
                    }
                }
            }

            r.Dispose();
            d.Dispose();
            docs.Dispose();
        }
Example #4
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestNumericField()
        {
            Directory dir     = NewDirectory();
            var       w       = new RandomIndexWriter(Random(), dir, ClassEnvRule.similarity, ClassEnvRule.timeZone);
            var       numDocs = AtLeast(500);
            var       answers = new object[numDocs];

            NumericType[] typeAnswers = new NumericType[numDocs];
            for (int id = 0; id < numDocs; id++)
            {
                Document    doc = new Document();
                Field       nf;
                Field       sf;
                object      answer;
                NumericType typeAnswer;
                if (Random().NextBoolean())
                {
                    // float/double
                    if (Random().NextBoolean())
                    {
                        float f = Random().NextFloat();
                        answer     = Convert.ToSingle(f, CultureInfo.InvariantCulture);
                        nf         = new SingleField("nf", f, Field.Store.NO);
                        sf         = new StoredField("nf", f);
                        typeAnswer = NumericType.SINGLE;
                    }
                    else
                    {
                        double d = Random().NextDouble();
                        answer     = Convert.ToDouble(d, CultureInfo.InvariantCulture);
                        nf         = new DoubleField("nf", d, Field.Store.NO);
                        sf         = new StoredField("nf", d);
                        typeAnswer = NumericType.DOUBLE;
                    }
                }
                else
                {
                    // int/long
                    if (Random().NextBoolean())
                    {
                        int i = Random().Next();
                        answer     = Convert.ToInt32(i, CultureInfo.InvariantCulture);
                        nf         = new Int32Field("nf", i, Field.Store.NO);
                        sf         = new StoredField("nf", i);
                        typeAnswer = NumericType.INT32;
                    }
                    else
                    {
                        long l = Random().NextLong();
                        answer     = Convert.ToInt64(l, CultureInfo.InvariantCulture);
                        nf         = new Int64Field("nf", l, Field.Store.NO);
                        sf         = new StoredField("nf", l);
                        typeAnswer = NumericType.INT64;
                    }
                }
                doc.Add(nf);
                doc.Add(sf);
                answers[id]     = answer;
                typeAnswers[id] = typeAnswer;
                FieldType ft = new FieldType(Int32Field.TYPE_STORED);
                ft.NumericPrecisionStep = int.MaxValue;
                doc.Add(new Int32Field("id", id, ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.Reader;

            w.Dispose();

            Assert.AreEqual(numDocs, r.NumDocs);

            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                AtomicReader      sub = (AtomicReader)ctx.Reader;
                FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                for (int docID = 0; docID < sub.NumDocs; docID++)
                {
                    Document doc = sub.Document(docID);
                    Field    f   = doc.GetField <Field>("nf");
                    Assert.IsTrue(f is StoredField, "got f=" + f);
#pragma warning disable 612, 618
                    Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue());
#pragma warning restore 612, 618
                }
            }
            r.Dispose();
            dir.Dispose();
        }
Example #5
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestRandomStoredFields()
        {
            Directory         dir  = NewDirectory();
            Random            rand = Random();
            RandomIndexWriter w    = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20)));
            //w.w.setNoCFSRatio(0.0);
            int docCount   = AtLeast(200);
            int fieldCount = TestUtil.NextInt(rand, 1, 5);

            IList <int?> fieldIDs = new List <int?>();

            FieldType customType = new FieldType(TextField.TYPE_STORED);

            customType.IsTokenized = false;
            Field idField = NewField("id", "", customType);

            for (int i = 0; i < fieldCount; i++)
            {
                fieldIDs.Add(i);
            }

            IDictionary <string, Document> docs = new Dictionary <string, Document>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: build index docCount=" + docCount);
            }

            FieldType customType2 = new FieldType();

            customType2.IsStored = true;
            for (int i = 0; i < docCount; i++)
            {
                Document doc = new Document();
                doc.Add(idField);
                string id = "" + i;
                idField.SetStringValue(id);
                docs[id] = doc;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: add doc id=" + id);
                }

                foreach (int field in fieldIDs)
                {
                    string s;
                    if (rand.Next(4) != 3)
                    {
                        s = TestUtil.RandomUnicodeString(rand, 1000);
                        doc.Add(NewField("f" + field, s, customType2));
                    }
                    else
                    {
                        s = null;
                    }
                }
                w.AddDocument(doc);
                if (rand.Next(50) == 17)
                {
                    // mixup binding of field name -> Number every so often
                    Collections.Shuffle(fieldIDs);
                }
                if (rand.Next(5) == 3 && i > 0)
                {
                    string delID = "" + rand.Next(i);
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: delete doc id=" + delID);
                    }
                    w.DeleteDocuments(new Term("id", delID));
                    docs.Remove(delID);
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields");
            }
            if (docs.Count > 0)
            {
                string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/);

                for (int x = 0; x < 2; x++)
                {
                    IndexReader   r = w.Reader;
                    IndexSearcher s = NewSearcher(r);

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle x=" + x + " r=" + r);
                    }

                    int num = AtLeast(1000);
                    for (int iter = 0; iter < num; iter++)
                    {
                        string testID = idsList[rand.Next(idsList.Length)];
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: test id=" + testID);
                        }
                        TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1);
                        Assert.AreEqual(1, hits.TotalHits);
                        Document doc    = r.Document(hits.ScoreDocs[0].Doc);
                        Document docExp = docs[testID];
                        for (int i = 0; i < fieldCount; i++)
                        {
                            assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i));
                        }
                    }
                    r.Dispose();
                    w.ForceMerge(1);
                }
            }
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestEnforceDeletions()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10)));
            // asserts below requires no unexpected merges:

            // NOTE: cannot use writer.getReader because RIW (on
            // flipping a coin) may give us a newly opened reader,
            // but we use .reopen on this reader below and expect to
            // (must) get an NRT reader:
            DirectoryReader reader = DirectoryReader.Open(writer.w, true);
            // same reason we don't wrap?
            IndexSearcher searcher = NewSearcher(reader, false);

            // add a doc, refresh the reader, and check that it's there
            Document doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1);
            Assert.AreEqual(1, docs.TotalHits, "Should find a hit...");

            Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1")));

            CachingWrapperFilter filter = new CachingWrapperFilter(startFilter);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.IsTrue(filter.SizeInBytes() > 0);

            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");

            Query constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // make sure we get a cache hit when we reopen reader
            // that had no change to deletions

            // fake delete (deletes nothing):
            writer.DeleteDocuments(new Term("foo", "bar"));

            IndexReader oldReader = reader;
            reader = RefreshReader(reader);
            Assert.IsTrue(reader == oldReader);
            int missCount = filter.MissCount;
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // cache hit:
            Assert.AreEqual(missCount, filter.MissCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;
            reader = RefreshReader(reader);

            searcher = NewSearcher(reader, false);

            missCount = filter.MissCount;
            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");

            // cache hit
            Assert.AreEqual(missCount, filter.MissCount);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");

            // apply deletes dynamically:
            filter = new CachingWrapperFilter(startFilter);
            writer.AddDocument(doc);
            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");
            missCount = filter.MissCount;
            Assert.IsTrue(missCount > 0);
            constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.MissCount);

            writer.AddDocument(doc);

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits...");
            Assert.IsTrue(filter.MissCount > missCount);
            missCount = filter.MissCount;

            constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.MissCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.MissCount);

            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.MissCount);

            // NOTE: silliness to make sure JRE does not eliminate
            // our holding onto oldReader to prevent
            // CachingWrapperFilter's WeakHashMap from dropping the
            // entry:
            Assert.IsTrue(oldReader != null);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Example #7
0
        public virtual void TestSortedSetWithDups()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc       = new Document();
                int      numValues = Random.Next(5);
                for (int j = 0; j < numValues; j++)
                {
                    doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random, 2))));
                }
                iw.AddDocument(doc);
                if (Random.Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.GetReader();

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.GetReader();
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            SortedSetDocValues multi  = MultiDocValues.GetSortedSetValues(ir, "bytes");
            SortedSetDocValues single = merged.GetSortedSetDocValues("bytes");

            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.ValueCount, multi.ValueCount);
                BytesRef actual   = new BytesRef();
                BytesRef expected = new BytesRef();
                // check values
                for (long i = 0; i < single.ValueCount; i++)
                {
                    single.LookupOrd(i, expected);
                    multi.LookupOrd(i, actual);
                    Assert.AreEqual(expected, actual);
                }
                // check ord list
                for (int i = 0; i < numDocs; i++)
                {
                    single.SetDocument(i);
                    List <long?> expectedList = new List <long?>();
                    long         ord;
                    while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        expectedList.Add(ord);
                    }

                    multi.SetDocument(i);
                    int upto = 0;
                    while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        Assert.AreEqual((long)expectedList[upto], ord);
                        upto++;
                    }
                    Assert.AreEqual(expectedList.Count, upto);
                }
            }

            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
        public virtual void Test2()
        {
            Random            random   = Random;
            int               NUM_DOCS = AtLeast(100);
            Directory         dir      = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                random, dir);
            bool          allowDups = random.NextBoolean();
            ISet <string> seen      = new JCG.HashSet <string>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
            }
            int numDocs = 0;
            IList <BytesRef> docValues = new List <BytesRef>();

            // TODO: deletions
            while (numDocs < NUM_DOCS)
            {
                string s;
                if (random.NextBoolean())
                {
                    s = TestUtil.RandomSimpleString(random);
                }
                else
                {
                    s = TestUtil.RandomUnicodeString(random);
                }
                BytesRef br = new BytesRef(s);

                if (!allowDups)
                {
                    if (seen.Contains(s))
                    {
                        continue;
                    }
                    seen.Add(s);
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  " + numDocs + ": s=" + s);
                }

                Document doc = new Document();
                doc.Add(new SortedDocValuesField("stringdv", br));
                doc.Add(new NumericDocValuesField("id", numDocs));
                docValues.Add(br);
                writer.AddDocument(doc);
                numDocs++;

                if (random.Next(40) == 17)
                {
                    // force flush
                    writer.GetReader().Dispose();
                }
            }

            writer.ForceMerge(1);
            DirectoryReader r = writer.GetReader();

            writer.Dispose();

            AtomicReader sr = GetOnlySegmentReader(r);

            long END_TIME = Environment.TickCount + (TEST_NIGHTLY ? 30 : 1);

            int NUM_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 1, 10);

            ThreadJob[] threads = new ThreadJob[NUM_THREADS];
            for (int thread = 0; thread < NUM_THREADS; thread++)
            {
                threads[thread] = new ThreadAnonymousInnerClassHelper2(random, docValues, sr, END_TIME);
                threads[thread].Start();
            }

            foreach (ThreadJob thread in threads)
            {
                thread.Join();
            }

            r.Dispose();
            dir.Dispose();
        }
        public static void BeforeClass()
        {
            Dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir);
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();

                AddSome(doc, AlwaysTerms);

                if (Random().Next(100) < 90)
                {
                    AddSome(doc, CommonTerms);
                }
                if (Random().Next(100) < 50)
                {
                    AddSome(doc, MediumTerms);
                }
                if (Random().Next(100) < 10)
                {
                    AddSome(doc, RareTerms);
                }
                iw.AddDocument(doc);
            }
            iw.ForceMerge(1);
            iw.Dispose();
            r = DirectoryReader.Open(Dir);
            atomicReader = GetOnlySegmentReader(r);
            Searcher = new IndexSearcher(atomicReader);
            Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper();
        }
Example #10
0
 public override void TearDown()
 {
     Writer.Dispose();
     Directory.Dispose();
     base.TearDown();
 }
        public override void SetUp()
        {
            base.SetUp();

            Index = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Index, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(Sim).SetMergePolicy(NewLogMergePolicy()));

            // hed is the most important field, dek is secondary

            // d1 is an "ok" match for: albino elephant
            {
                Document d1 = new Document();
                d1.Add(NewField("id", "d1", NonAnalyzedType)); // Field.Keyword("id",
                // "d1"));
                d1.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant"));
                d1.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant"));
                writer.AddDocument(d1);
            }

            // d2 is a "good" match for: albino elephant
            {
                Document d2 = new Document();
                d2.Add(NewField("id", "d2", NonAnalyzedType)); // Field.Keyword("id",
                // "d2"));
                d2.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant"));
                d2.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek",
                // "albino"));
                d2.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant"));
                writer.AddDocument(d2);
            }

            // d3 is a "better" match for: albino elephant
            {
                Document d3 = new Document();
                d3.Add(NewField("id", "d3", NonAnalyzedType)); // Field.Keyword("id",
                // "d3"));
                d3.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed",
                // "albino"));
                d3.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant"));
                writer.AddDocument(d3);
            }

            // d4 is the "best" match for: albino elephant
            {
                Document d4 = new Document();
                d4.Add(NewField("id", "d4", NonAnalyzedType)); // Field.Keyword("id",
                // "d4"));
                d4.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed",
                // "albino"));
                d4.Add(NewField("hed", "elephant", NonAnalyzedType)); // Field.Text("hed", "elephant"));
                d4.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek",
                // "albino"));
                writer.AddDocument(d4);
            }

            r = SlowCompositeReaderWrapper.Wrap(writer.Reader);
            writer.Dispose();
            s = NewSearcher(r);
            s.Similarity = Sim;
        }
Example #12
0
        public virtual void TestCrazySpans()
        {
            // The problem: "normal" lucene queries create scorers, returning null if terms dont exist
            // this means they never score a term that does not exist.
            // however with spans, there is only one scorer for the whole hierarchy:
            // inner queries are not real queries, their boosts are ignored, etc.
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            doc.Add(NewField("foo", "bar", ft));
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar"));
                SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz"));
                Query query = new SpanOrQuery(s1, s2);
                TopDocs td = @is.Search(query, 10);
                Assert.AreEqual(1, td.TotalHits);
                float score = td.ScoreDocs[0].Score;
                Assert.IsTrue(score >= 0.0f);
                Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim);
            }
            ir.Dispose();
            dir.Dispose();
        }
        public void TestNumericField()
        {
            Directory dir     = NewDirectory();
            var       w       = new RandomIndexWriter(Random(), dir);
            var       numDocs = AtLeast(500);
            var       answers = new object[numDocs];

            FieldType.NumericType[] typeAnswers = new FieldType.NumericType[numDocs];
            for (int id = 0; id < numDocs; id++)
            {
                Document doc = new Document();
                Field    nf;
                Field    sf;
                object   answer;
                FieldType.NumericType typeAnswer;
                if (Random().NextBoolean())
                {
                    // float/double
                    if (Random().NextBoolean())
                    {
                        float f = Random().NextFloat();
                        answer     = Convert.ToSingle(f);
                        nf         = new FloatField("nf", f, Field.Store.NO);
                        sf         = new StoredField("nf", f);
                        typeAnswer = FieldType.NumericType.FLOAT;
                    }
                    else
                    {
                        double d = Random().NextDouble();
                        answer     = Convert.ToDouble(d);
                        nf         = new DoubleField("nf", d, Field.Store.NO);
                        sf         = new StoredField("nf", d);
                        typeAnswer = FieldType.NumericType.DOUBLE;
                    }
                }
                else
                {
                    // int/long
                    if (Random().NextBoolean())
                    {
                        int i = Random().Next();
                        answer     = Convert.ToInt32(i);
                        nf         = new IntField("nf", i, Field.Store.NO);
                        sf         = new StoredField("nf", i);
                        typeAnswer = FieldType.NumericType.INT;
                    }
                    else
                    {
                        long l = Random().NextLong();
                        answer     = Convert.ToInt64(l);
                        nf         = new LongField("nf", l, Field.Store.NO);
                        sf         = new StoredField("nf", l);
                        typeAnswer = FieldType.NumericType.LONG;
                    }
                }
                doc.Add(nf);
                doc.Add(sf);
                answers[id]     = answer;
                typeAnswers[id] = typeAnswer;
                FieldType ft = new FieldType(IntField.TYPE_STORED);
                ft.NumericPrecisionStep = int.MaxValue;
                doc.Add(new IntField("id", id, ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.Reader;

            w.Dispose();

            Assert.AreEqual(numDocs, r.NumDocs);

            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                AtomicReader    sub = (AtomicReader)ctx.Reader;
                FieldCache.Ints ids = FieldCache.DEFAULT.GetInts(sub, "id", false);
                for (int docID = 0; docID < sub.NumDocs; docID++)
                {
                    Document doc = sub.Document(docID);
                    Field    f   = (Field)doc.GetField("nf");
                    Assert.IsTrue(f is StoredField, "got f=" + f);
                    Assert.AreEqual(answers[ids.Get(docID)], f.NumericValue);
                }
            }
            r.Dispose();
            dir.Dispose();
        }
Example #14
0
        public virtual void TestOmitTFAndNorms()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            ft.OmitNorms = true;
            ft.Freeze();
            Field f = NewField("foo", "bar", ft);
            doc.Add(f);
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                BooleanQuery query = new BooleanQuery(true);
                query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
                Assert.AreEqual(1, @is.Search(query, 10).TotalHits);
            }
            ir.Dispose();
            dir.Dispose();
        }
Example #15
0
        public virtual void TestEmptyTerm()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            doc.Add(NewTextField("foo", "bar", Field.Store.NO));
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                BooleanQuery query = new BooleanQuery(true);
                query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
                query.Add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD);
                Assert.AreEqual(1, @is.Search(query, 10).TotalHits);
            }
            ir.Dispose();
            dir.Dispose();
        }
Example #16
0
        public virtual void TestEmptyIndex()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                Assert.AreEqual(0, @is.Search(new TermQuery(new Term("foo", "bar")), 10).TotalHits);
            }
            ir.Dispose();
            dir.Dispose();
        }
Example #17
0
        public virtual void TestIndexing()
        {
            DirectoryInfo        tmpDir = CreateTempDir("TestNeverDelete");
            BaseDirectoryWrapper d      = NewFSDirectory(tmpDir);

            // We want to "see" files removed if Lucene removed
            // them.  this is still worth running on Windows since
            // some files the IR opens and closes.
            if (d is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)d).NoDeleteOpenFile = false;
            }
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));

            w.w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 5, 30));

            w.Commit();
            ThreadClass[] indexThreads = new ThreadClass[Random().Next(4)];
            long          stopTime     = Environment.TickCount + AtLeast(1000);

            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x]      = new ThreadAnonymousInnerClassHelper(w, stopTime, NewStringField, NewTextField);
                indexThreads[x].Name = "Thread " + x;
                indexThreads[x].Start();
            }

            HashSet <string> allFiles = new HashSet <string>();

            DirectoryReader r = DirectoryReader.Open(d);

            while (Environment.TickCount < stopTime)
            {
                IndexCommit ic = r.IndexCommit;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: check files: " + ic.FileNames);
                }
                allFiles.AddAll(ic.FileNames);
                // Make sure no old files were removed
                foreach (string fileName in allFiles)
                {
                    Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist");
                }
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                if (r2 != null)
                {
                    r.Dispose();
                    r = r2;
                }
                Thread.Sleep(1);
            }
            r.Dispose();

            foreach (ThreadClass t in indexThreads)
            {
                t.Join();
            }
            w.Dispose();
            d.Dispose();

            System.IO.Directory.Delete(tmpDir.FullName, true);
        }
Example #18
0
        public virtual void TestIndexing()
        {
            DirectoryInfo        tmpDir = CreateTempDir("TestNeverDelete");
            BaseDirectoryWrapper d      = NewFSDirectory(tmpDir);

            // We want to "see" files removed if Lucene removed
            // them.  this is still worth running on Windows since
            // some files the IR opens and closes.
            if (d is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)d).NoDeleteOpenFile = false;
            }
            RandomIndexWriter w = new RandomIndexWriter(Random, d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));

            w.IndexWriter.Config.SetMaxBufferedDocs(TestUtil.NextInt32(Random, 5, 30));

            w.Commit();
            ThreadJob[] indexThreads = new ThreadJob[Random.Next(4)];
            long        stopTime     = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + AtLeast(1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x]      = new ThreadAnonymousClass(w, stopTime, NewStringField, NewTextField);
                indexThreads[x].Name = "Thread " + x;
                indexThreads[x].Start();
            }

            ISet <string> allFiles = new JCG.HashSet <string>();

            DirectoryReader r = DirectoryReader.Open(d);

            while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            {
                IndexCommit ic = r.IndexCommit;
                if (Verbose)
                {
                    Console.WriteLine("TEST: check files: " + ic.FileNames);
                }
                allFiles.UnionWith(ic.FileNames);
                // Make sure no old files were removed
                foreach (string fileName in allFiles)
                {
                    Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist");
                }
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                if (r2 != null)
                {
                    r.Dispose();
                    r = r2;
                }
                Thread.Sleep(1);
            }
            r.Dispose();

            foreach (ThreadJob t in indexThreads)
            {
                t.Join();
            }
            w.Dispose();
            d.Dispose();

            System.IO.Directory.Delete(tmpDir.FullName, true);
        }
Example #19
0
        public virtual void TestRandomWithPrefix()
        {
            Directory dir = NewDirectory();

            HashSet <string> prefixes = new HashSet <string>();
            int numPrefix             = TestUtil.NextInt(Random(), 2, 7);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: use " + numPrefix + " prefixes");
            }
            while (prefixes.Count < numPrefix)
            {
                prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random()));
                //prefixes.Add(TestUtil.RandomSimpleString(random));
            }
            string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/);

            int NUM_TERMS            = AtLeast(20);
            HashSet <BytesRef> terms = new HashSet <BytesRef>();

            while (terms.Count < NUM_TERMS)
            {
                string s = prefixesArray[Random().Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random());
                //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random);
                if (s.Length > 0)
                {
                    terms.Add(new BytesRef(s));
                }
            }
            BytesRef[] termsArray = terms.ToArray();
            Array.Sort(termsArray);

            int NUM_DOCS = AtLeast(100);

            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Sometimes swap in codec that impls ord():
            if (Random().Next(10) == 7)
            {
                Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds"));
                conf.SetCodec(codec);
            }

            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, conf);

            int[][]        idToOrds      = new int[NUM_DOCS][];
            HashSet <int?> ordsForDocSet = new HashSet <int?>();

            for (int id = 0; id < NUM_DOCS; id++)
            {
                Document doc = new Document();

                doc.Add(new Int32Field("id", id, Field.Store.NO));

                int termCount = TestUtil.NextInt(Random(), 0, 20 * RANDOM_MULTIPLIER);
                while (ordsForDocSet.Count < termCount)
                {
                    ordsForDocSet.Add(Random().Next(termsArray.Length));
                }
                int[] ordsForDoc = new int[termCount];
                int   upto       = 0;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: doc id=" + id);
                }
                foreach (int ord in ordsForDocSet)
                {
                    ordsForDoc[upto++] = ord;
                    Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  f=" + termsArray[ord].Utf8ToString());
                    }
                    doc.Add(field);
                }
                ordsForDocSet.Clear();
                Array.Sort(ordsForDoc);
                idToOrds[id] = ordsForDoc;
                w.AddDocument(doc);
            }

            DirectoryReader r = w.Reader;

            w.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: reader=" + r);
            }

            AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r);

            foreach (string prefix in prefixesArray)
            {
                BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);

                int[][] idToOrdsPrefix = new int[NUM_DOCS][];
                for (int id = 0; id < NUM_DOCS; id++)
                {
                    int[]        docOrds = idToOrds[id];
                    IList <int?> newOrds = new List <int?>();
                    foreach (int ord in idToOrds[id])
                    {
                        if (StringHelper.StartsWith(termsArray[ord], prefixRef))
                        {
                            newOrds.Add(ord);
                        }
                    }
                    int[] newOrdsArray = new int[newOrds.Count];
                    int   upto         = 0;
                    foreach (int ord in newOrds)
                    {
                        newOrdsArray[upto++] = ord;
                    }
                    idToOrdsPrefix[id] = newOrdsArray;
                }

                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: sub=" + ctx.Reader);
                    }
                    Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef);
                }

                // Also test top-level reader: its enum does not support
                // ord, so this forces the OrdWrapper to run:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: top reader");
                }
                Verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
            }

            FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey);

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestIllegalIndexableField()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.StoreTermVectors        = true;
            ft.StoreTermVectorPayloads = true;
            Document doc = new Document();

            doc.Add(new Field("field", "value", ft));
            try
            {
                w.AddDocument(doc);
                Assert.Fail("did not hit exception");
            }
            catch (ArgumentException iae)
            {
                // Expected
                Assert.AreEqual("cannot index term vector payloads without term vector positions (field=\"field\")", iae.Message);
            }

            ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.StoreTermVectors       = false;
            ft.StoreTermVectorOffsets = true;
            doc = new Document();
            doc.Add(new Field("field", "value", ft));
            try
            {
                w.AddDocument(doc);
                Assert.Fail("did not hit exception");
            }
            catch (ArgumentException iae)
            {
                // Expected
                Assert.AreEqual("cannot index term vector offsets when term vectors are not indexed (field=\"field\")", iae.Message);
            }

            ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.StoreTermVectors         = false;
            ft.StoreTermVectorPositions = true;
            doc = new Document();
            doc.Add(new Field("field", "value", ft));
            try
            {
                w.AddDocument(doc);
                Assert.Fail("did not hit exception");
            }
            catch (ArgumentException iae)
            {
                // Expected
                Assert.AreEqual("cannot index term vector positions when term vectors are not indexed (field=\"field\")", iae.Message);
            }

            ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.StoreTermVectors        = false;
            ft.StoreTermVectorPayloads = true;
            doc = new Document();
            doc.Add(new Field("field", "value", ft));
            try
            {
                w.AddDocument(doc);
                Assert.Fail("did not hit exception");
            }
            catch (ArgumentException iae)
            {
                // Expected
                Assert.AreEqual("cannot index term vector payloads when term vectors are not indexed (field=\"field\")", iae.Message);
            }

            w.Dispose();

            dir.Dispose();
        }
        public static void BeforeClass()
        {
            NoDocs = AtLeast(4096);
            Distance = (1L << 60) / NoDocs;
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 100, 1000)).SetMergePolicy(NewLogMergePolicy()));

            FieldType storedLong = new FieldType(LongField.TYPE_NOT_STORED);
            storedLong.Stored = true;
            storedLong.Freeze();

            FieldType storedLong8 = new FieldType(storedLong);
            storedLong8.NumericPrecisionStep = 8;

            FieldType storedLong4 = new FieldType(storedLong);
            storedLong4.NumericPrecisionStep = 4;

            FieldType storedLong6 = new FieldType(storedLong);
            storedLong6.NumericPrecisionStep = 6;

            FieldType storedLong2 = new FieldType(storedLong);
            storedLong2.NumericPrecisionStep = 2;

            FieldType storedLongNone = new FieldType(storedLong);
            storedLongNone.NumericPrecisionStep = int.MaxValue;

            FieldType unstoredLong = LongField.TYPE_NOT_STORED;

            FieldType unstoredLong8 = new FieldType(unstoredLong);
            unstoredLong8.NumericPrecisionStep = 8;

            FieldType unstoredLong6 = new FieldType(unstoredLong);
            unstoredLong6.NumericPrecisionStep = 6;

            FieldType unstoredLong4 = new FieldType(unstoredLong);
            unstoredLong4.NumericPrecisionStep = 4;

            FieldType unstoredLong2 = new FieldType(unstoredLong);
            unstoredLong2.NumericPrecisionStep = 2;

            LongField field8 = new LongField("field8", 0L, storedLong8), field6 = new LongField("field6", 0L, storedLong6), field4 = new LongField("field4", 0L, storedLong4), field2 = new LongField("field2", 0L, storedLong2), fieldNoTrie = new LongField("field" + int.MaxValue, 0L, storedLongNone), ascfield8 = new LongField("ascfield8", 0L, unstoredLong8), ascfield6 = new LongField("ascfield6", 0L, unstoredLong6), ascfield4 = new LongField("ascfield4", 0L, unstoredLong4), ascfield2 = new LongField("ascfield2", 0L, unstoredLong2);

            Document doc = new Document();
            // add fields, that have a distance to test general functionality
            doc.Add(field8);
            doc.Add(field6);
            doc.Add(field4);
            doc.Add(field2);
            doc.Add(fieldNoTrie);
            // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
            doc.Add(ascfield8);
            doc.Add(ascfield6);
            doc.Add(ascfield4);
            doc.Add(ascfield2);

            // Add a series of noDocs docs with increasing long values, by updating the fields
            for (int l = 0; l < NoDocs; l++)
            {
                long val = Distance * l + StartOffset;
                field8.LongValue = val;
                field6.LongValue = val;
                field4.LongValue = val;
                field2.LongValue = val;
                fieldNoTrie.LongValue = val;

                val = l - (NoDocs / 2);
                ascfield8.LongValue = val;
                ascfield6.LongValue = val;
                ascfield4.LongValue = val;
                ascfield2.LongValue = val;
                writer.AddDocument(doc);
            }
            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Example #22
0
        public virtual void DoTestLongPostingsNoPositions(FieldInfo.IndexOptions options)
        {
            // Don't use TestUtil.getTempDir so that we own the
            // randomness (ie same seed will point to same dir):
            Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong()));

            int NUM_DOCS = AtLeast(2000);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
            }

            string s1 = GetRandomTerm(null);
            string s2 = GetRandomTerm(s1);

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2);

                /*
                 * for(int idx=0;idx<s1.Length();idx++) {
                 * System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
                 * }
                 * for(int idx=0;idx<s2.Length();idx++) {
                 * System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
                 * }
                 */
            }

            FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                if (Random().NextBoolean())
                {
                    isS1.Set(idx);
                }
            }

            IndexReader r;

            if (true)
            {
                IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE).SetMergePolicy(NewLogMergePolicy());
                iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble());
                iwc.SetMaxBufferedDocs(-1);
                RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc);

                FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
                ft.IndexOptions = options;
                for (int idx = 0; idx < NUM_DOCS; idx++)
                {
                    Document doc   = new Document();
                    string   s     = isS1.Get(idx) ? s1 : s2;
                    Field    f     = NewField("field", s, ft);
                    int      count = TestUtil.NextInt(Random(), 1, 4);
                    for (int ct = 0; ct < count; ct++)
                    {
                        doc.Add(f);
                    }
                    riw.AddDocument(doc);
                }

                r = riw.Reader;
                riw.Dispose();
            }
            else
            {
                r = DirectoryReader.Open(dir);
            }

            /*
             * if (VERBOSE) {
             * System.out.println("TEST: terms");
             * TermEnum termEnum = r.Terms();
             * while(termEnum.Next()) {
             *  System.out.println("  term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length());
             *  Assert.IsTrue(termEnum.DocFreq() > 0);
             *  System.out.println("    s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length());
             *  System.out.println("    s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length());
             *  final String s = termEnum.Term().Text();
             *  for(int idx=0;idx<s.Length();idx++) {
             *    System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
             *  }
             * }
             * }
             */

            Assert.AreEqual(NUM_DOCS, r.NumDocs);
            Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0);
            Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0);

            int num = AtLeast(1000);

            for (int iter = 0; iter < num; iter++)
            {
                string term;
                bool   doS1;
                if (Random().NextBoolean())
                {
                    term = s1;
                    doS1 = true;
                }
                else
                {
                    term = s2;
                    doS1 = false;
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term);
                }

                DocsEnum docs;
                DocsEnum postings;

                if (options == FieldInfo.IndexOptions.DOCS_ONLY)
                {
                    docs     = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_NONE);
                    postings = null;
                }
                else
                {
                    docs = postings = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_FREQS);
                    Debug.Assert(postings != null);
                }
                Debug.Assert(docs != null);

                int docID = -1;
                while (docID < DocIdSetIterator.NO_MORE_DOCS)
                {
                    int what = Random().Next(3);
                    if (what == 0)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do next()");
                        }
                        // nextDoc
                        int expected = docID + 1;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }
                        docID = docs.NextDoc();
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random().Next(6) == 3 && postings != null)
                        {
                            int freq = postings.Freq();
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                        }
                    }
                    else
                    {
                        // advance
                        int targetDocID;
                        if (docID == -1)
                        {
                            targetDocID = Random().Next(NUM_DOCS + 1);
                        }
                        else
                        {
                            targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
                        }
                        int expected = targetDocID;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }

                        docID = docs.Advance(targetDocID);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random().Next(6) == 3 && postings != null)
                        {
                            int freq = postings.Freq();
                            Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq);
                        }
                    }
                }
            }
            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestNullDocIdSetIterator()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            writer.Dispose();

            IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            AtomicReaderContext context = (AtomicReaderContext)reader.Context;

            Filter filter = new FilterAnonymousInnerClassHelper2(this, context);
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // the caching filter should return the empty set constant
            Assert.IsNull(cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs));

            reader.Dispose();
            dir.Dispose();
        }
Example #24
0
        public virtual void Test2()
        {
            Random            random    = Random;
            int               NUM_DOCS  = AtLeast(100);
            Directory         dir       = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(random, dir);
            bool              allowDups = random.NextBoolean();
            ISet <string>     seen      = new JCG.HashSet <string>();

            if (Verbose)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
            }
            int numDocs = 0;
            IList <BytesRef> docValues = new JCG.List <BytesRef>();

            // TODO: deletions
            while (numDocs < NUM_DOCS)
            {
                string s;
                if (random.NextBoolean())
                {
                    s = TestUtil.RandomSimpleString(random);
                }
                else
                {
                    s = TestUtil.RandomUnicodeString(random);
                }
                BytesRef br = new BytesRef(s);

                if (!allowDups)
                {
                    if (seen.Contains(s))
                    {
                        continue;
                    }
                    seen.Add(s);
                }

                if (Verbose)
                {
                    Console.WriteLine("  " + numDocs + ": s=" + s);
                }

                Document doc = new Document();
                doc.Add(new SortedDocValuesField("stringdv", br));
                doc.Add(new NumericDocValuesField("id", numDocs));
                docValues.Add(br);
                writer.AddDocument(doc);
                numDocs++;

                if (random.Next(40) == 17)
                {
                    // force flush
                    writer.GetReader().Dispose();
                }
            }

            writer.ForceMerge(1);
            DirectoryReader r = writer.GetReader();

            writer.Dispose();

            AtomicReader sr = GetOnlySegmentReader(r);

            long END_TIME = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (TestNightly ? 30 : 1); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            int NUM_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 1, 10);

            ThreadJob[] threads = new ThreadJob[NUM_THREADS];
            for (int thread = 0; thread < NUM_THREADS; thread++)
            {
                threads[thread] = new ThreadAnonymousClass2(random, docValues, sr, END_TIME);
                threads[thread].Start();
            }

            foreach (ThreadJob thread in threads)
            {
                thread.Join();
            }

            r.Dispose();
            dir.Dispose();
        }
Example #25
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            Collections.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType(prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                        // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Count);
            TermsEnum termsEnum = terms.GetIterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Example #26
0
        public virtual void DoTestNumbers(bool withPayloads)
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random());

            Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random().NextBoolean();
                ft.StoreTermVectorPositions = Random().NextBoolean();
            }

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("numbers", English.IntToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;

            w.Dispose();

            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
            {
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int    freq          = dp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        dp.NextPosition();
                        int start = dp.StartOffset;
                        Debug.Assert(start >= 0);
                        int end = dp.EndOffset;
                        Debug.Assert(end >= 0 && end >= start);
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal));
                        if (withPayloads)
                        {
                            // check that we have a payload and it starts with "pos"
                            Assert.IsNotNull(dp.GetPayload());
                            BytesRef payload = dp.GetPayload();
                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                    }
                }
            }

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
            {
                int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq;
                for (int i = 0; i < freq; i++)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    dp.NextPosition();
                    int start = dp.StartOffset;
                    Debug.Assert(start >= 0);
                    int end = dp.EndOffset;
                    Debug.Assert(end >= 0 && end >= start);
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal));
                    if (withPayloads)
                    {
                        // check that we have a payload and it starts with "pos"
                        Assert.IsNotNull(dp.GetPayload());
                        BytesRef payload = dp.GetPayload();
                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                }
            }

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
            {
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
            }

            reader.Dispose();
            dir.Dispose();
        }
Example #27
0
        public virtual void TestDocsWithField()
        {
            AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField);
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                if (Random.Next(4) >= 0)
                {
                    doc.Add(new NumericDocValuesField("numbers", Random.NextInt64()));
                }
                doc.Add(new NumericDocValuesField("numbersAlways", Random.NextInt64()));
                iw.AddDocument(doc);
                if (Random.Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.GetReader();

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.GetReader();
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            IBits multi  = MultiDocValues.GetDocsWithField(ir, "numbers");
            IBits single = merged.GetDocsWithField("numbers");

            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.Length, multi.Length);
                for (int i = 0; i < numDocs; i++)
                {
                    Assert.AreEqual(single.Get(i), multi.Get(i));
                }
            }

            multi  = MultiDocValues.GetDocsWithField(ir, "numbersAlways");
            single = merged.GetDocsWithField("numbersAlways");
            Assert.AreEqual(single.Length, multi.Length);
            for (int i = 0; i < numDocs; i++)
            {
                Assert.AreEqual(single.Get(i), multi.Get(i));
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
Example #28
0
        public virtual void TestRandom()
        {
            // token -> docID -> tokens
            IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >();

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Iwc);

            int numDocs = AtLeast(20);
            //final int numDocs = AtLeast(5);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: randomize what IndexOptions we use; also test
            // changing this up in one IW buffered segment...:
            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random().NextBoolean();
                ft.StoreTermVectorPositions = Random().NextBoolean();
            }

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = new Document();
                doc.Add(new Int32Field("id", docCount, Field.Store.NO));
                IList <Token> tokens    = new List <Token>();
                int           numTokens = AtLeast(100);
                //final int numTokens = AtLeast(20);
                int pos    = -1;
                int offset = 0;
                //System.out.println("doc id=" + docCount);
                for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
                {
                    string text;
                    if (Random().NextBoolean())
                    {
                        text = "a";
                    }
                    else if (Random().NextBoolean())
                    {
                        text = "b";
                    }
                    else if (Random().NextBoolean())
                    {
                        text = "c";
                    }
                    else
                    {
                        text = "d";
                    }

                    int posIncr = Random().NextBoolean() ? 1 : Random().Next(5);
                    if (tokenCount == 0 && posIncr == 0)
                    {
                        posIncr = 1;
                    }
                    int offIncr     = Random().NextBoolean() ? 0 : Random().Next(5);
                    int tokenOffset = Random().Next(5);

                    Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
                    if (!actualTokens.ContainsKey(text))
                    {
                        actualTokens[text] = new Dictionary <int?, IList <Token> >();
                    }
                    IDictionary <int?, IList <Token> > postingsByDoc = actualTokens[text];
                    if (!postingsByDoc.ContainsKey(docCount))
                    {
                        postingsByDoc[docCount] = new List <Token>();
                    }
                    postingsByDoc[docCount].Add(token);
                    tokens.Add(token);
                    pos += posIncr;
                    // stuff abs position into type:
                    token.Type = "" + pos;
                    offset    += offIncr + tokenOffset;
                    //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
                }
                doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.Reader;

            w.Dispose();

            string[] terms = new string[] { "a", "b", "c", "d" };
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                // TODO: improve this
                AtomicReader sub = (AtomicReader)ctx.Reader;
                //System.out.println("\nsub=" + sub);
                TermsEnum            termsEnum                  = sub.Fields.GetTerms("content").GetIterator(null);
                DocsEnum             docs                       = null;
                DocsAndPositionsEnum docsAndPositions           = null;
                DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
                FieldCache.Int32s    docIDToID                  = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                foreach (string term in terms)
                {
                    //System.out.println("  term=" + term);
                    if (termsEnum.SeekExact(new BytesRef(term)))
                    {
                        docs = termsEnum.Docs(null, docs);
                        Assert.IsNotNull(docs);
                        int doc;
                        //System.out.println("    doc/freq");
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docs.Freq);
                        }

                        // explicitly exclude offsets here
                        docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsFlags.PAYLOADS);
                        Assert.IsNotNull(docsAndPositions);
                        //System.out.println("    doc/freq/pos");
                        while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositions.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositions.NextPosition());
                            }
                        }

                        docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
                        Assert.IsNotNull(docsAndPositionsAndOffsets);
                        //System.out.println("    doc/freq/pos/offs");
                        while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
                                Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset);
                                Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset);
                            }
                        }
                    }
                }
                // TODO: test advance:
            }
            r.Dispose();
            dir.Dispose();
        }
Example #29
0
        // [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestBigDocuments()
        {
            // "big" as "much bigger than the chunk size"
            // for this test we force a FS dir
            // we can't just use newFSDirectory, because this test doesn't really index anything.
            // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
            Directory         dir    = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments")));
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            Document emptyDoc = new Document(); // emptyDoc
            Document bigDoc1  = new Document(); // lot of small fields
            Document bigDoc2  = new Document(); // 1 very big field

            Field idField = new StringField("id", "", Field.Store.NO);

            emptyDoc.Add(idField);
            bigDoc1.Add(idField);
            bigDoc2.Add(idField);

            FieldType onlyStored = new FieldType(StringField.TYPE_STORED);

            onlyStored.IsIndexed = false;

            Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored);
            int   numFields  = RandomInts.NextIntBetween(Random(), 500000, 1000000);

            for (int i = 0; i < numFields; ++i)
            {
                bigDoc1.Add(smallField);
            }

            Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored);

            bigDoc2.Add(bigField);

            int numDocs = AtLeast(5);

            Document[] docs = new Document[numDocs];
            for (int i = 0; i < numDocs; ++i)
            {
                docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2));
            }
            for (int i = 0; i < numDocs; ++i)
            {
                idField.SetStringValue("" + i);
                iw.AddDocument(docs[i]);
                if (Random().Next(numDocs) == 0)
                {
                    iw.Commit();
                }
            }
            iw.Commit();
            iw.ForceMerge(1); // look at what happens when big docs are merged
            DirectoryReader rd       = DirectoryReader.Open(dir);
            IndexSearcher   searcher = new IndexSearcher(rd);

            for (int i = 0; i < numDocs; ++i)
            {
                Query   query   = new TermQuery(new Term("id", "" + i));
                TopDocs topDocs = searcher.Search(query, 1);
                Assert.AreEqual(1, topDocs.TotalHits, "" + i);
                Document doc = rd.Document(topDocs.ScoreDocs[0].Doc);
                Assert.IsNotNull(doc);
                IIndexableField[] fieldValues = doc.GetFields("fld");
                Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length);
                if (fieldValues.Length > 0)
                {
                    Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue());
                }
            }
            rd.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Example #30
0
        public virtual void Test()
        {
            int NUM_DOCS = AtLeast(1000);

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = null;

            int docsLeftInthisSegment = 0;

            int docUpto = 0;

            while (docUpto < NUM_DOCS)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: " + docUpto + " of " + NUM_DOCS);
                }
                if (docsLeftInthisSegment == 0)
                {
                    IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
                    if (Random.NextBoolean())
                    {
                        // Make sure we aggressively mix in SimpleText
                        // since it has different impls for all codec
                        // formats...
                        iwc.SetCodec(Codec.ForName("Lucene46"));
                    }
                    if (w != null)
                    {
                        w.Dispose();
                    }
                    w = new RandomIndexWriter(Random, dir, iwc);
                    docsLeftInthisSegment = TestUtil.NextInt32(Random, 10, 100);
                }
                Document doc = new Document();
                doc.Add(NewStringField("id", Convert.ToString(docUpto), Field.Store.YES));
                w.AddDocument(doc);
                docUpto++;
                docsLeftInthisSegment--;
            }

            if (Verbose)
            {
                Console.WriteLine("\nTEST: now delete...");
            }

            // Random delete half the docs:
            ISet <int?> deleted = new JCG.HashSet <int?>();

            while (deleted.Count < NUM_DOCS / 2)
            {
                int?toDelete = Random.Next(NUM_DOCS);
                if (!deleted.Contains(toDelete))
                {
                    deleted.Add(toDelete);
                    w.DeleteDocuments(new Term("id", Convert.ToString(toDelete)));
                    if (Random.Next(17) == 6)
                    {
                        IndexReader r = w.GetReader();
                        Assert.AreEqual(NUM_DOCS - deleted.Count, r.NumDocs);
                        r.Dispose();
                    }
                }
            }

            w.Dispose();
            dir.Dispose();
        }
Example #31
0
        public virtual void TestIntersectRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            ISet <string>                terms        = new JCG.HashSet <string>();
            ICollection <string>         pendingTerms = new List <string>();
            IDictionary <BytesRef, int?> termToID     = new Dictionary <BytesRef, int?>();
            int id = 0;

            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random.Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]      termsArray = new BytesRef[terms.Count];
            ISet <BytesRef> termsSet   = new JCG.HashSet <BytesRef>();

            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (Verbose)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RandomMultiplier; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                ISet <string>            acceptTerms       = new JCG.HashSet <string>();
                JCG.SortedSet <BytesRef> sortedAcceptTerms = new JCG.SortedSet <BytesRef>();
                double    keepPct = Random.NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random.NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random.NextBoolean())
                {
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]      acceptTermsArray = new BytesRef[acceptTerms.Count];
                ISet <BytesRef> acceptTermsSet   = new JCG.HashSet <BytesRef>();
                int             upto             = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (Verbose)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)];

                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        BytesRef actual   = te.Next();
                        if (Verbose)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq);
                        docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsNull(te.Next());
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestStressAdvance_Mem()
        {
            for (int iter = 0; iter < 3; iter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, dir);
                ISet <int>         aDocs = new JCG.HashSet <int>();
                Documents.Document doc   = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field idField = NewStringField("id", "", Field.Store.YES);
                doc.Add(idField);
                int num = AtLeast(4097);
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: numDocs=" + num);
                }
                for (int id = 0; id < num; id++)
                {
                    if (Random.Next(4) == 3)
                    {
                        f.SetStringValue("a");
                        aDocs.Add(id);
                    }
                    else
                    {
                        f.SetStringValue("b");
                    }
                    idField.SetStringValue("" + id);
                    w.AddDocument(doc);
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: doc upto " + id);
                    }
                }

                w.ForceMerge(1);

                IList <int> aDocIDs = new List <int>();
                IList <int> bDocIDs = new List <int>();

                DirectoryReader r         = w.GetReader();
                int[]           idToDocID = new int[r.MaxDoc];
                for (int docID = 0; docID < idToDocID.Length; docID++)
                {
                    int id = Convert.ToInt32(r.Document(docID).Get("id"));
                    if (aDocs.Contains(id))
                    {
                        aDocIDs.Add(docID);
                    }
                    else
                    {
                        bDocIDs.Add(docID);
                    }
                }
                TermsEnum te = GetOnlySegmentReader(r).Fields.GetTerms("field").GetIterator(null);

                DocsEnum de = null;
                for (int iter2 = 0; iter2 < 10; iter2++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2);
                    }
                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a")));
                    de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE);
                    TestOne(de, aDocIDs);

                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b")));
                    de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE);
                    TestOne(de, bDocIDs);
                }

                w.Dispose();
                r.Dispose();
                dir.Dispose();
            }
        }
Example #33
0
        public virtual void TestIntersectStartTerm()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());

            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "abd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "acd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bcd", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.GetTerms("field");

            Automaton         automaton = (new RegExp(".*d", RegExpSyntax.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te;

            // should seek to startTerm
            te = terms.Intersect(ca, new BytesRef("aad"));
            Assert.AreEqual("abd", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("acd", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should fail to find ceil label on second arc, rewind
            te = terms.Intersect(ca, new BytesRef("add"));
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should reach end
            te = terms.Intersect(ca, new BytesRef("bcd"));
            Assert.IsNull(te.Next());
            te = terms.Intersect(ca, new BytesRef("ddd"));
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestRandom()
        {
            Directory dir = NewDirectory();

            int             NUM_TERMS = AtLeast(20);
            ISet <BytesRef> terms     = new JCG.HashSet <BytesRef>();

            while (terms.Count < NUM_TERMS)
            {
                string s = TestUtil.RandomRealisticUnicodeString(Random);
                //final String s = TestUtil.RandomSimpleString(random);
                if (s.Length > 0)
                {
                    terms.Add(new BytesRef(s));
                }
            }
            BytesRef[] termsArray = terms.ToArray(/*new BytesRef[terms.Count]*/);
            Array.Sort(termsArray);

            int NUM_DOCS = AtLeast(100);

            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            // Sometimes swap in codec that impls ord():
            if (Random.Next(10) == 7)
            {
                // Make sure terms index has ords:
                Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds"));
                conf.SetCodec(codec);
            }

            RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf);

            int[][]     idToOrds      = new int[NUM_DOCS][];
            ISet <int?> ordsForDocSet = new JCG.HashSet <int?>();

            for (int id = 0; id < NUM_DOCS; id++)
            {
                Document doc = new Document();

                doc.Add(new Int32Field("id", id, Field.Store.NO));

                int termCount = TestUtil.NextInt32(Random, 0, 20 * RandomMultiplier);
                while (ordsForDocSet.Count < termCount)
                {
                    ordsForDocSet.Add(Random.Next(termsArray.Length));
                }
                int[] ordsForDoc = new int[termCount];
                int   upto       = 0;
                if (Verbose)
                {
                    Console.WriteLine("TEST: doc id=" + id);
                }
                foreach (int ord in ordsForDocSet)
                {
                    ordsForDoc[upto++] = ord;
                    Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO);
                    if (Verbose)
                    {
                        Console.WriteLine("  f=" + termsArray[ord].Utf8ToString());
                    }
                    doc.Add(field);
                }
                ordsForDocSet.Clear();
                Array.Sort(ordsForDoc);
                idToOrds[id] = ordsForDoc;
                w.AddDocument(doc);
            }

            DirectoryReader r = w.GetReader();

            w.Dispose();

            if (Verbose)
            {
                Console.WriteLine("TEST: reader=" + r);
            }

            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: sub=" + ctx.Reader);
                }
                Verify((AtomicReader)ctx.Reader, idToOrds, termsArray, null);
            }

            // Also test top-level reader: its enum does not support
            // ord, so this forces the OrdWrapper to run:
            if (Verbose)
            {
                Console.WriteLine("TEST: top reader");
            }
            AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r);

            Verify(slowR, idToOrds, termsArray, null);

            FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey);

            r.Dispose();
            dir.Dispose();
        }
Example #35
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                //numTerms /= 2;
                // LUCENENET specific - To keep this under the 1 hour free limit
                // of Azure DevOps, this was reduced from /2 to /6.
                numTerms /= 6;
            }
            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType(prototype.FieldType);

            if (Random.NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random.Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                        // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random.Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousClass(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Count);
            TermsEnum termsEnum = terms.GetEnumerator();

            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.TotalTermFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Example #36
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }

            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountDownLatch startingGun = new CountDownLatch(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.countDown();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
            {
                Assert.AreEqual(numTerms - 1, terms.Size());
            }
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  term_;

            while ((term_ = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(term_.Utf8ToString());
                Assert.AreEqual(value, termsEnum.DocFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Example #37
0
        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }
Example #38
0
        public virtual void TestArbitraryFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            int NUM_DOCS = AtLeast(27);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + NUM_DOCS + " docs");
            }
            int[] fieldsPerDoc = new int[NUM_DOCS];
            int   baseCount    = 0;

            for (int docCount = 0; docCount < NUM_DOCS; docCount++)
            {
                int fieldCount = TestUtil.NextInt(Random(), 1, 17);
                fieldsPerDoc[docCount] = fieldCount - 1;

                int finalDocCount = docCount;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount);
                }

                int finalBaseCount = baseCount;
                baseCount += fieldCount - 1;

                w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount));
            }

            IndexReader r = w.Reader;

            w.Dispose();

            IndexSearcher s       = NewSearcher(r);
            int           counter = 0;

            for (int id = 0; id < NUM_DOCS; id++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
                }
                TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1);
                Assert.AreEqual(1, hits.TotalHits);
                int      docID      = hits.ScoreDocs[0].Doc;
                Document doc        = s.Doc(docID);
                int      endCounter = counter + fieldsPerDoc[id];
                while (counter < endCounter)
                {
                    string name    = "f" + counter;
                    int    fieldID = counter % 10;

                    bool stored  = (counter & 1) == 0 || fieldID == 3;
                    bool binary  = fieldID == 3;
                    bool indexed = fieldID != 3;

                    string stringValue;
                    if (fieldID != 3 && fieldID != 9)
                    {
                        stringValue = "text " + counter;
                    }
                    else
                    {
                        stringValue = null;
                    }

                    // stored:
                    if (stored)
                    {
                        IIndexableField f = doc.GetField(name);
                        Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                        if (binary)
                        {
                            Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                            BytesRef b = f.GetBinaryValue();
                            Assert.IsNotNull(b);
                            Assert.AreEqual(10, b.Length);
                            for (int idx = 0; idx < 10; idx++)
                            {
                                Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]);
                            }
                        }
                        else
                        {
                            Debug.Assert(stringValue != null);
                            Assert.AreEqual(stringValue, f.GetStringValue());
                        }
                    }

                    if (indexed)
                    {
                        bool tv = counter % 2 == 1 && fieldID != 9;
                        if (tv)
                        {
                            Terms tfv = r.GetTermVectors(docID).GetTerms(name);
                            Assert.IsNotNull(tfv);
                            TermsEnum termsEnum = tfv.GetIterator(null);
                            Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(1, dpEnum.NextPosition());

                            Assert.AreEqual(new BytesRef("text"), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(0, dpEnum.NextPosition());

                            Assert.IsNull(termsEnum.Next());

                            // TODO: offsets
                        }
                        else
                        {
                            Fields vectors = r.GetTermVectors(docID);
                            Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null);
                        }

                        BooleanQuery bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST);
                        TopDocs hits2 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits2.TotalHits);
                        Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc);

                        bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST);
                        TopDocs hits3 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits3.TotalHits);
                        Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc);
                    }

                    counter++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestSimple()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            Field field = NewTextField("foo", "", Field.Store.NO);
            doc.Add(field);
            Field dvField = new FloatDocValuesField("foo_boost", 0.0F);
            doc.Add(dvField);
            Field field2 = NewTextField("bar", "", Field.Store.NO);
            doc.Add(field2);

            field.StringValue = "quick brown fox";
            field2.StringValue = "quick brown fox";
            dvField.FloatValue = 2f; // boost x2
            iw.AddDocument(doc);
            field.StringValue = "jumps over lazy brown dog";
            field2.StringValue = "jumps over lazy brown dog";
            dvField.FloatValue = 4f; // boost x4
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();

            // no boosting
            IndexSearcher searcher1 = NewSearcher(ir, false);
            Similarity @base = searcher1.Similarity;
            // boosting
            IndexSearcher searcher2 = NewSearcher(ir, false);
            searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base);

            // in this case, we searched on field "foo". first document should have 2x the score.
            TermQuery tq = new TermQuery(new Term("foo", "quick"));
            QueryUtils.Check(Random(), tq, searcher1);
            QueryUtils.Check(Random(), tq, searcher2);

            TopDocs noboost = searcher1.Search(tq, 10);
            TopDocs boost = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc));
            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON);

            // this query matches only the second document, which should have 4x the score.
            tq = new TermQuery(new Term("foo", "jumps"));
            QueryUtils.Check(Random(), tq, searcher1);
            QueryUtils.Check(Random(), tq, searcher2);

            noboost = searcher1.Search(tq, 10);
            boost = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON);

            // search on on field bar just for kicks, nothing should happen, since we setup
            // our sim provider to only use foo_boost for field foo.
            tq = new TermQuery(new Term("bar", "quick"));
            QueryUtils.Check(Random(), tq, searcher1);
            QueryUtils.Check(Random(), tq, searcher2);

            noboost = searcher1.Search(tq, 10);
            boost = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON);

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestChangeGaps()
        {
            // LUCENE-5324: check that it is possible to change the wrapper's gaps
            int positionGap = Random().Next(1000);
            int offsetGap = Random().Next(1000);
            Analyzer @delegate = new MockAnalyzer(Random());
            Analyzer a = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory());
            Document doc = new Document();
            FieldType ft = new FieldType();
            ft.Indexed = true;
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            ft.Tokenized = true;
            ft.StoreTermVectors = true;
            ft.StoreTermVectorPositions = true;
            ft.StoreTermVectorOffsets = true;
            doc.Add(new Field("f", "a", ft));
            doc.Add(new Field("f", "a", ft));
            writer.AddDocument(doc, a);
            AtomicReader reader = GetOnlySegmentReader(writer.Reader);
            Fields fields = reader.GetTermVectors(0);
            Terms terms = fields.Terms("f");
            TermsEnum te = terms.Iterator(null);
            Assert.AreEqual(new BytesRef("a"), te.Next());
            DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null);
            Assert.AreEqual(0, dpe.NextDoc());
            Assert.AreEqual(2, dpe.Freq());
            Assert.AreEqual(0, dpe.NextPosition());
            Assert.AreEqual(0, dpe.StartOffset());
            int endOffset = dpe.EndOffset();
            Assert.AreEqual(1 + positionGap, dpe.NextPosition());
            Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset());
            Assert.AreEqual(null, te.Next());
            reader.Dispose();
            writer.Dispose();
            writer.w.Directory.Dispose();
        }
        public virtual void TestInfiniteValues()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document doc = new Document();
            doc.Add(new DoubleField("double", double.NegativeInfinity, Field.Store.NO));
            doc.Add(new LongField("long", long.MinValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new DoubleField("double", double.PositiveInfinity, Field.Store.NO));
            doc.Add(new LongField("long", long.MaxValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new DoubleField("double", 0.0, Field.Store.NO));
            doc.Add(new LongField("long", 0L, Field.Store.NO));
            writer.AddDocument(doc);

            foreach (double d in TestNumericUtils.DOUBLE_NANs)
            {
                doc = new Document();
                doc.Add(new DoubleField("double", d, Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            IndexSearcher s = NewSearcher(r);

            Query q = NumericRangeQuery.NewLongRange("long", null, null, true, true);
            TopDocs topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewLongRange("long", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", null, null, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", double.NaN, double.NaN, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(TestNumericUtils.DOUBLE_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count");

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestPostings()
        {
            Directory dir = NewFSDirectory(CreateTempDir("postings"));
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetCodec(Codec.ForName("Lucene40"));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            Document doc = new Document();

            // id field
            FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);
            idType.StoreTermVectors = true;
            Field idField = new Field("id", "", idType);
            doc.Add(idField);

            // title field: short text field
            FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);
            titleType.StoreTermVectors = true;
            titleType.StoreTermVectorPositions = true;
            titleType.StoreTermVectorOffsets = true;
            titleType.IndexOptions = IndexOptions();
            Field titleField = new Field("title", "", titleType);
            doc.Add(titleField);

            // body field: long text field
            FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);
            bodyType.StoreTermVectors = true;
            bodyType.StoreTermVectorPositions = true;
            bodyType.StoreTermVectorOffsets = true;
            bodyType.IndexOptions = IndexOptions();
            Field bodyField = new Field("body", "", bodyType);
            doc.Add(bodyField);

            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                titleField.StringValue = FieldValue(1);
                bodyField.StringValue = FieldValue(3);
                iw.AddDocument(doc);
                if (Random().Next(20) == 0)
                {
                    iw.DeleteDocuments(new Term("id", Convert.ToString(i)));
                }
            }
            if (Random().NextBoolean())
            {
                // delete 1-100% of docs
                iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)]));
            }
            iw.Dispose();
            dir.Dispose(); // checkindex
        }
Example #43
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }

            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
#pragma warning disable 612, 618
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
#pragma warning restore 612, 618
            {
                Assert.AreEqual(numTerms - 1, terms.Count);
            }
            TermsEnum termsEnum = terms.GetEnumerator();
            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.DocFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public virtual void TestIsCacheAble()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            writer.AddDocument(new Document());
            writer.Dispose();

            IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));

            // not cacheable:
            AssertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test", "value"))), false);
            // returns default empty docidset, always cacheable:
            AssertDocIdSetCacheable(reader, NumericRangeFilter.NewIntRange("test", Convert.ToInt32(10000), Convert.ToInt32(-10000), true, true), true);
            // is cacheable:
            AssertDocIdSetCacheable(reader, FieldCacheRangeFilter.NewIntRange("test", Convert.ToInt32(10), Convert.ToInt32(20), true, true), true);
            // a fixedbitset filter is always cacheable
            AssertDocIdSetCacheable(reader, new FilterAnonymousInnerClassHelper3(this), true);

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY);
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
            // TODO we could actually add more fields implemented with different PFs
            // or, just put this test into the usual rotation?
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone());
            Document doc = new Document();
            FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn this on for a cross-check
            docsOnlyType.StoreTermVectors = true;
            docsOnlyType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;

            FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn this on for a cross-check
            docsAndFreqsType.StoreTermVectors = true;
            docsAndFreqsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;

            FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn these on for a cross-check
            positionsType.StoreTermVectors = true;
            positionsType.StoreTermVectorPositions = true;
            positionsType.StoreTermVectorOffsets = true;
            positionsType.StoreTermVectorPayloads = true;
            FieldType offsetsType = new FieldType(positionsType);
            offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            Field field1 = new Field("field1docs", "", docsOnlyType);
            Field field2 = new Field("field2freqs", "", docsAndFreqsType);
            Field field3 = new Field("field3positions", "", positionsType);
            Field field4 = new Field("field4offsets", "", offsetsType);
            Field field5 = new Field("field5payloadsFixed", "", positionsType);
            Field field6 = new Field("field6payloadsVariable", "", positionsType);
            Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
            Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
            doc.Add(field1);
            doc.Add(field2);
            doc.Add(field3);
            doc.Add(field4);
            doc.Add(field5);
            doc.Add(field6);
            doc.Add(field7);
            doc.Add(field8);
            for (int i = 0; i < MAXDOC; i++)
            {
                string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random());
                field1.StringValue = stringValue;
                field2.StringValue = stringValue;
                field3.StringValue = stringValue;
                field4.StringValue = stringValue;
                field5.StringValue = stringValue;
                field6.StringValue = stringValue;
                field7.StringValue = stringValue;
                field8.StringValue = stringValue;
                iw.AddDocument(doc);
            }
            iw.Dispose();
            Verify(dir);
            TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge
            iwc.SetOpenMode(OpenMode_e.APPEND);
            IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone());
            iw2.ForceMerge(1);
            iw2.Dispose();
            Verify(dir);
            dir.Dispose();
        }
        public virtual void TestTermUTF16SortOrder()
        {
            Random            rnd    = Random();
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(rnd, dir);
            Document          d      = new Document();
            // Single segment
            Field f = NewStringField("f", "", Field.Store.NO);

            d.Add(f);
            char[]           chars    = new char[2];
            HashSet <string> allTerms = new HashSet <string>();

            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                string s;
                if (rnd.NextBoolean())
                {
                    // Single char
                    if (rnd.NextBoolean())
                    {
                        // Above surrogates
                        chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff);
                    }
                    else
                    {
                        // Below surrogates
                        chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1);
                    }
                    s = new string(chars, 0, 1);
                }
                else
                {
                    // Surrogate pair
                    chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END);
                    Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END);
                    chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END);
                    s        = new string(chars, 0, 2);
                }
                allTerms.Add(s);
                f.StringValue = s;

                writer.AddDocument(d);

                if ((1 + i) % 42 == 0)
                {
                    writer.Commit();
                }
            }

            IndexReader r = writer.Reader;

            // Test each sub-segment
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                CheckTermsOrder(ctx.Reader, allTerms, false);
            }
            CheckTermsOrder(r, allTerms, true);

            // Test multi segment
            r.Dispose();

            writer.ForceMerge(1);

            // Test single segment
            r = writer.Reader;
            CheckTermsOrder(r, allTerms, true);
            r.Dispose();

            writer.Dispose();
            dir.Dispose();
        }
Example #47
0
        public virtual void Test()
        {
            // update variables
            int commitPercent        = Random.Next(20);
            int softCommitPercent    = Random.Next(100); // what percent of the commits are soft
            int deletePercent        = Random.Next(50);
            int deleteByQueryPercent = Random.Next(25);
            int ndocs                = AtLeast(50);
            int nWriteThreads        = TestUtil.NextInt32(Random, 1, TestNightly ? 10 : 5);
            int maxConcurrentCommits = TestUtil.NextInt32(Random, 1, TestNightly ? 10 : 5); // number of committers at a time... needed if we want to avoid commit errors due to exceeding the max

            bool tombstones = Random.NextBoolean();

            // query variables
            AtomicInt64 operations = new AtomicInt64(AtLeast(10000)); // number of query operations to perform in total

            int nReadThreads = TestUtil.NextInt32(Random, 1, TestNightly ? 10 : 5);

            InitModel(ndocs);

            FieldType storedOnlyType = new FieldType();

            storedOnlyType.IsStored = true;

            if (Verbose)
            {
                Console.WriteLine("\n");
                Console.WriteLine("TEST: commitPercent=" + commitPercent);
                Console.WriteLine("TEST: softCommitPercent=" + softCommitPercent);
                Console.WriteLine("TEST: deletePercent=" + deletePercent);
                Console.WriteLine("TEST: deleteByQueryPercent=" + deleteByQueryPercent);
                Console.WriteLine("TEST: ndocs=" + ndocs);
                Console.WriteLine("TEST: nWriteThreads=" + nWriteThreads);
                Console.WriteLine("TEST: nReadThreads=" + nReadThreads);
                Console.WriteLine("TEST: maxConcurrentCommits=" + maxConcurrentCommits);
                Console.WriteLine("TEST: tombstones=" + tombstones);
                Console.WriteLine("TEST: operations=" + operations);
                Console.WriteLine("\n");
            }

            AtomicInt32 numCommitting = new AtomicInt32();

            IList <ThreadJob> threads = new List <ThreadJob>();

            Directory dir = NewDirectory();

            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            writer.DoRandomForceMergeAssert = false;
            writer.Commit();
            reader = DirectoryReader.Open(dir);

            for (int i = 0; i < nWriteThreads; i++)
            {
                ThreadJob thread = new ThreadAnonymousClass(this, "WRITER" + i, commitPercent, softCommitPercent, deletePercent, deleteByQueryPercent, ndocs, maxConcurrentCommits, tombstones, operations, storedOnlyType, numCommitting, writer);

                threads.Add(thread);
            }

            for (int i = 0; i < nReadThreads; i++)
            {
                ThreadJob thread = new ThreadAnonymousClass2(this, "READER" + i, ndocs, tombstones, operations);

                threads.Add(thread);
            }

            foreach (ThreadJob thread in threads)
            {
                thread.Start();
            }

            foreach (ThreadJob thread in threads)
            {
                thread.Join();
            }

            writer.Dispose();
            if (Verbose)
            {
                Console.WriteLine("TEST: close reader=" + reader);
            }
            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestCachingWorks()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            writer.Dispose();

            IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            AtomicReaderContext context = (AtomicReaderContext)reader.Context;
            MockFilter filter = new MockFilter();
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // first time, nested filter is called
            DocIdSet strongRef = cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs);
            Assert.IsTrue(filter.WasCalled(), "first time");

            // make sure no exception if cache is holding the wrong docIdSet
            cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs);

            // second time, nested filter should not be called
            filter.Clear();
            cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs);
            Assert.IsFalse(filter.WasCalled(), "second time");

            reader.Dispose();
            dir.Dispose();
        }