Example #1
0
        public void TestRandomChainsWithLargeStrings()
        {
            int    numIterations = AtLeast(20);
            Random random        = Random;

            for (int i = 0; i < numIterations; i++)
            {
                MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next());
                if (Verbose)
                {
                    Console.WriteLine("Creating random analyzer:" + a);
                }
                try
                {
                    CheckRandomData(random, a, 50 * RandomMultiplier, 128, false,
                                    false /* We already validate our own offsets... */);
                }
                catch (Exception /*e*/)
                {
                    Console.WriteLine("Exception from random analyzer: " + a);
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
            }
        }
Example #2
0
        // constructor
        public PerfRunData(Config config)
        {
            this.config = config;
            // analyzer (default is standard analyzer)
            analyzer = NewAnalyzerTask.CreateAnalyzer(config.Get("analyzer",
                                                                 typeof(Lucene.Net.Analysis.Standard.StandardAnalyzer).AssemblyQualifiedName));

            // content source
            string sourceClass = config.Get("content.source", typeof(SingleDocSource).AssemblyQualifiedName);

            contentSource = (ContentSource)Activator.CreateInstance(Type.GetType(sourceClass)); //Class.forName(sourceClass).asSubclass(typeof(ContentSource)).newInstance();
            contentSource.SetConfig(config);

            // doc maker
            docMaker = (DocMaker)Activator.CreateInstance(Type.GetType(config.Get("doc.maker", typeof(DocMaker).AssemblyQualifiedName)));  // "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
            docMaker.SetConfig(config, contentSource);
            // facet source
            facetSource = (FacetSource)Activator.CreateInstance(Type.GetType(config.Get("facet.source",
                                                                                        typeof(RandomFacetSource).AssemblyQualifiedName))); // "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
            facetSource.SetConfig(config);
            // query makers
            readTaskQueryMaker = new Dictionary <Type, IQueryMaker>();
            qmkrClass          = Type.GetType(config.Get("query.maker", typeof(SimpleQueryMaker).AssemblyQualifiedName));

            // index stuff
            Reinit(false);

            // statistic points
            points = new Points(config);

            if (bool.Parse(config.Get("log.queries", "false")))
            {
                Console.WriteLine("------------> queries:");
                Console.WriteLine(GetQueryMaker(new SearchTask(this)).PrintQueries());
            }
        }
            public override void Warm(AtomicReader reader)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: now warm merged reader=" + reader);
                }
#if FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE
                outerInstance.warmed.AddOrUpdate(((SegmentReader)reader).core, true);
#else
                outerInstance.warmed[((SegmentReader)reader).core] = true;
#endif
                int   maxDoc   = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;
                int   sum      = 0;
                int   inc      = Math.Max(1, maxDoc / 50);
                for (int docID = 0; docID < maxDoc; docID += inc)
                {
                    if (liveDocs == null || liveDocs.Get(docID))
                    {
                        Document doc = reader.Document(docID);
                        sum += doc.Fields.Count;
                    }
                }
                IndexSearcher searcher =
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    outerInstance.
#endif
                    NewSearcher(reader);

                sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits;

                if (Verbose)
                {
                    Console.WriteLine("TEST: warm visited " + sum + " fields");
                }
            }
Example #4
0
        public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
        {
            int minSkipInterval;

            if (state.SegmentInfo.DocCount > 1000000)
            {
                // Test2BPostings can OOME otherwise:
                minSkipInterval = 3;
            }
            else
            {
                minSkipInterval = 2;
            }

            // we pull this before the seed intentionally: because its not consumed at runtime
            // (the skipInterval is written into postings header)
            int skipInterval = TestUtil.NextInt32(seedRandom, minSkipInterval, 10);

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval);
            }

            long seed = seedRandom.NextInt64();

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed);
            }

            string      seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT);
            IndexOutput @out         = state.Directory.CreateOutput(seedFileName, state.Context);

            try
            {
                @out.WriteInt64(seed);
            }
            finally
            {
                @out.Dispose();
            }

            Random random = new J2N.Randomizer(seed);

            random.Next(); // consume a random for buffersize

            PostingsWriterBase postingsWriter;

            if (random.nextBoolean())
            {
                postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval);
            }
            else
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: writing Standard postings");
                }
                // TODO: randomize variables like acceptibleOverHead?!
                postingsWriter = new Lucene41PostingsWriter(state, skipInterval);
            }

            if (random.NextBoolean())
            {
                int totTFCutoff = TestUtil.NextInt32(random, 1, 20);
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
                }
                postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter);
            }

            FieldsConsumer fields;
            int            t1 = random.Next(4);

            if (t1 == 0)
            {
                bool success = false;
                try
                {
                    fields  = new FSTTermsWriter(state, postingsWriter);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsWriter.Dispose();
                    }
                }
            }
            else if (t1 == 1)
            {
                bool success = false;
                try
                {
                    fields  = new FSTOrdTermsWriter(state, postingsWriter);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsWriter.Dispose();
                    }
                }
            }
            else if (t1 == 2)
            {
                // Use BlockTree terms dict

                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: writing BlockTree terms dict");
                }

                // TODO: would be nice to allow 1 but this is very
                // slow to write
                int minTermsInBlock = TestUtil.NextInt32(random, 2, 100);
                int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.Next(100));

                bool success = false;
                try
                {
                    fields  = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsWriter.Dispose();
                    }
                }
            }
            else
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: writing Block terms dict");
                }

                bool success = false;

                TermsIndexWriterBase indexWriter;
                try
                {
                    if (random.NextBoolean())
                    {
                        state.TermIndexInterval = TestUtil.NextInt32(random, 1, 100);
                        if (LuceneTestCase.Verbose)
                        {
                            Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")");
                        }
                        indexWriter = new FixedGapTermsIndexWriter(state);
                    }
                    else
                    {
                        VariableGapTermsIndexWriter.IndexTermSelector selector;
                        int n2 = random.Next(3);
                        if (n2 == 0)
                        {
                            int tii = TestUtil.NextInt32(random, 1, 100);
                            selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
                            if (LuceneTestCase.Verbose)
                            {
                                Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
                            }
                        }
                        else if (n2 == 1)
                        {
                            int docFreqThresh = TestUtil.NextInt32(random, 2, 100);
                            int tii           = TestUtil.NextInt32(random, 1, 100);
                            selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
                        }
                        else
                        {
                            long seed2 = random.NextInt64();
                            int  gap   = TestUtil.NextInt32(random, 2, 40);
                            if (LuceneTestCase.Verbose)
                            {
                                Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
                            }
                            selector = new IndexTermSelectorAnonymousClass(seed2, gap);
                        }
                        indexWriter = new VariableGapTermsIndexWriter(state, selector);
                    }
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsWriter.Dispose();
                    }
                }

                success = false;
                try
                {
                    fields  = new BlockTermsWriter(indexWriter, state, postingsWriter);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        try
                        {
                            postingsWriter.Dispose();
                        }
                        finally
                        {
                            indexWriter.Dispose();
                        }
                    }
                }
            }

            return(fields);
        }
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (Verbose)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(false, () => "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
                }
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.GetIterator(null);

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                {
                    docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetIterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
            {
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)
                {
                    break;
                }

                termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception /*t*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception /*e*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.GetIterator(null);
                    }
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                    {
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.GetIterator(null);
                    }
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                    {
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }
Example #6
0
        private void DoTestSeekDoesNotExist(Random r, int numField, IList <Term> fieldTerms, Term[] fieldTermsArray, IndexReader reader)
        {
            IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>();

            if (Verbose)
            {
                Console.WriteLine("TEST: top random seeks");
            }

            {
                int num = AtLeast(100);
                for (int iter = 0; iter < num; iter++)
                {
                    // seek to random spot
                    string field = ("f" + r.Next(numField)).Intern();
                    Term   tx    = new Term(field, GetRandomString(r));

                    int spot = Array.BinarySearch(fieldTermsArray, tx);

                    if (spot < 0)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: non-exist seek to " + field + ":" + UnicodeUtil.ToHexString(tx.Text));
                        }

                        // term does not exist:
                        if (!tes.TryGetValue(field, out TermsEnum te))
                        {
                            te         = MultiFields.GetTerms(reader, field).GetEnumerator();
                            tes[field] = te;
                        }

                        if (Verbose)
                        {
                            Console.WriteLine("  got enum");
                        }

                        spot = -spot - 1;

                        if (spot == fieldTerms.Count || !fieldTerms[spot].Field.Equals(field, StringComparison.Ordinal))
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.END, te.SeekCeil(tx.Bytes));
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(tx.Bytes));

                            if (Verbose)
                            {
                                Console.WriteLine("  got term=" + UnicodeUtil.ToHexString(te.Term.Utf8ToString()));
                                Console.WriteLine("  exp term=" + UnicodeUtil.ToHexString(fieldTerms[spot].Text));
                            }

                            Assert.AreEqual(fieldTerms[spot].Bytes, te.Term);

                            // now .next() this many times:
                            int ct = TestUtil.NextInt32(r, 5, 100);
                            for (int i = 0; i < ct; i++)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: now next()");
                                }
                                if (1 + spot + i >= fieldTerms.Count)
                                {
                                    break;
                                }
                                Term term = fieldTerms[1 + spot + i];
                                if (!term.Field.Equals(field, StringComparison.Ordinal))
                                {
                                    Assert.IsFalse(te.MoveNext());
                                    break;
                                }
                                else
                                {
                                    Assert.IsTrue(te.MoveNext());
                                    BytesRef t = te.Term;

                                    if (Verbose)
                                    {
                                        Console.WriteLine("  got term=" + (t == null ? null : UnicodeUtil.ToHexString(t.Utf8ToString())));
                                        Console.WriteLine("       exp=" + UnicodeUtil.ToHexString(term.Text.ToString()));
                                    }

                                    Assert.AreEqual(term.Bytes, t);
                                }
                            }
                        }
                    }
                }
            }
        }
Example #7
0
            // Look for seek type 3 ("pop"): if the delta from
            // prev -> current was replacing an S with an E,
            // we must now seek to beyond that E.  this seek
            // "finishes" the dance at this character
            // position.
            private bool DoPop()
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  try pop");
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(newSuffixStart <= prevTerm.Length);
                    Debugging.Assert(newSuffixStart < scratchTerm.Length || newSuffixStart == 0);
                }

                if (prevTerm.Length > newSuffixStart && IsNonBMPChar(prevTerm.Bytes, newSuffixStart) && IsHighBMPChar(scratchTerm.Bytes, newSuffixStart))
                {
                    // Seek type 2 -- put 0xFF at this position:
                    scratchTerm.Bytes[newSuffixStart] = 0xff;
                    scratchTerm.Length = newSuffixStart + 1;

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("    seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString());
                    }

                    // TODO: more efficient seek?  can we simply swap
                    // the enums?
                    outerInstance.TermsDict.SeekEnum(termEnum, new Term(fieldInfo.Name, scratchTerm), true);

                    Term t2 = termEnum.Term();

                    // We could hit EOF or different field since this
                    // was a seek "forward":
                    if (t2 != null && t2.Field == internedFieldName)
                    {
                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text) + " " + t2.Bytes);
                        }

                        BytesRef b2 = t2.Bytes;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(b2.Offset == 0);
                        }

                        // Set newSuffixStart -- we can't use
                        // termEnum's since the above seek may have
                        // done no scanning (eg, term was precisely
                        // and index term, or, was in the term seek
                        // cache):
                        scratchTerm.CopyBytes(b2);
                        SetNewSuffixStart(prevTerm, scratchTerm);

                        return(true);
                    }
                    else if (newSuffixStart != 0 || scratchTerm.Length != 0)
                    {
                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("      got term=null (or next field)");
                        }
                        newSuffixStart     = 0;
                        scratchTerm.Length = 0;
                        return(true);
                    }
                }

                return(false);
            }
Example #8
0
        public virtual void TestChangeCodecAndMerge()
        {
            Directory dir = NewDirectory();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: make new index");
            }
            IndexWriterConfig iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                                            new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetCodec(new MockCodec());

            iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10);
            IndexWriter writer = NewWriter(dir, iwconf);

            AddDocs(writer, 10);
            writer.Commit();
            AssertQuery(new Term("content", "aaa"), dir, 10);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: addDocs3");
            }
            AddDocs3(writer, 10);
            writer.Commit();
            writer.Dispose();

            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "aaa"), dir, 10);
            Codec codec = iwconf.Codec;

            iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                     .SetOpenMode(OpenMode.APPEND).SetCodec(codec);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setNoCFSRatio(0.0);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10);
            iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);

            iwconf.SetCodec(new MockCodec2()); // uses standard for field content
            writer = NewWriter(dir, iwconf);
            // swap in new codec for currently written segments
            if (VERBOSE)
            {
                Console.WriteLine("TEST: add docs w/ Standard codec for content field");
            }
            AddDocs2(writer, 10);
            writer.Commit();
            codec = iwconf.Codec;
            Assert.AreEqual(30, writer.MaxDoc);
            AssertQuery(new Term("content", "bbb"), dir, 10);
            AssertQuery(new Term("content", "ccc"), dir, 10); ////
            AssertQuery(new Term("content", "aaa"), dir, 10);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: add more docs w/ new codec");
            }
            AddDocs2(writer, 10);
            writer.Commit();
            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "bbb"), dir, 20);
            AssertQuery(new Term("content", "aaa"), dir, 10);
            Assert.AreEqual(40, writer.MaxDoc);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now optimize");
            }
            writer.ForceMerge(1);
            Assert.AreEqual(40, writer.MaxDoc);
            writer.Dispose();
            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "bbb"), dir, 20);
            AssertQuery(new Term("content", "aaa"), dir, 10);

            dir.Dispose();
        }
Example #9
0
        public virtual void TestRandom()
        {
            int num = AtLeast(2);

            for (int iter = 0; iter < num; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }

                Directory dir = NewDirectory();

                IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                // we can do this because we use NoMergePolicy (and dont merge to "nothing")
                w.KeepFullyDeletedSegments = true;

                IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >();
                ISet <int?>      deleted = new JCG.HashSet <int?>();
                IList <BytesRef> terms   = new List <BytesRef>();

                int numDocs            = TestUtil.NextInt32(Random, 1, 100 * RANDOM_MULTIPLIER);
                Documents.Document doc = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field id = NewStringField("id", "", Field.Store.NO);
                doc.Add(id);

                bool onlyUniqueTerms = Random.NextBoolean();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
                }
                ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>();
                for (int i = 0; i < numDocs; i++)
                {
                    if (!onlyUniqueTerms && Random.NextBoolean() && terms.Count > 0)
                    {
                        // re-use existing term
                        BytesRef term = terms[Random.Next(terms.Count)];
                        docs[term].Add(i);
                        f.SetStringValue(term.Utf8ToString());
                    }
                    else
                    {
                        string   s    = TestUtil.RandomUnicodeString(Random, 10);
                        BytesRef term = new BytesRef(s);
                        if (!docs.TryGetValue(term, out IList <int?> docsTerm))
                        {
                            docs[term] = docsTerm = new List <int?>();
                        }
                        docsTerm.Add(i);
                        terms.Add(term);
                        uniqueTerms.Add(term);
                        f.SetStringValue(s);
                    }
                    id.SetStringValue("" + i);
                    w.AddDocument(doc);
                    if (Random.Next(4) == 1)
                    {
                        w.Commit();
                    }
                    if (i > 0 && Random.Next(20) == 1)
                    {
                        int delID = Random.Next(i);
                        deleted.Add(delID);
                        w.DeleteDocuments(new Term("id", "" + delID));
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: delete " + delID);
                        }
                    }
                }

                if (VERBOSE)
                {
                    List <BytesRef> termsList = new List <BytesRef>(uniqueTerms);
#pragma warning disable 612, 618
                    termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer);
#pragma warning restore 612, 618
                    Console.WriteLine("TEST: terms in UTF16 order:");
                    foreach (BytesRef b in termsList)
                    {
                        Console.WriteLine("  " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b);
                        foreach (int docID in docs[b])
                        {
                            if (deleted.Contains(docID))
                            {
                                Console.WriteLine("    " + docID + " (deleted)");
                            }
                            else
                            {
                                Console.WriteLine("    " + docID);
                            }
                        }
                    }
                }

                IndexReader reader = w.GetReader();
                w.Dispose();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: reader=" + reader);
                }

                IBits liveDocs = MultiFields.GetLiveDocs(reader);
                foreach (int delDoc in deleted)
                {
                    Assert.IsFalse(liveDocs.Get(delDoc));
                }

                for (int i = 0; i < 100; i++)
                {
                    BytesRef term = terms[Random.Next(terms.Count)];
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term);
                    }

                    DocsEnum docsEnum = TestUtil.Docs(Random, reader, "field", term, liveDocs, null, DocsFlags.NONE);
                    Assert.IsNotNull(docsEnum);

                    foreach (int docID in docs[term])
                    {
                        if (!deleted.Contains(docID))
                        {
                            Assert.AreEqual(docID, docsEnum.NextDoc());
                        }
                    }
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                }

                reader.Dispose();
                dir.Dispose();
            }
        }
        /// <summary>
        /// Prints the filename and size of each file within a given compound file.
        /// Add the -extract flag to extract files to the current working directory.
        /// In order to make the extracted version of the index work, you have to copy
        /// the segments file from the compound index into the directory where the extracted files are stored. </summary>
        ///// <param name="args"> Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt; </param>
        public static void Main(string[] args)
        {
            string filename = null;
            bool   extract  = false;
            string dirImpl  = null;

            int j = 0;

            while (j < args.Length)
            {
                string arg = args[j];
                if ("-extract".Equals(arg, StringComparison.Ordinal))
                {
                    extract = true;
                }
                else if ("-dir-impl".Equals(arg, StringComparison.Ordinal))
                {
                    if (j == args.Length - 1)
                    {
                        // LUCENENET specific - our wrapper console shows the correct usage
                        throw new ArgumentException("ERROR: missing value for --directory-type option");
                        //Console.WriteLine("ERROR: missing value for -dir-impl option");
                        //Environment.Exit(1);
                    }
                    j++;
                    dirImpl = args[j];
                }
                else if (filename == null)
                {
                    filename = arg;
                }
                j++;
            }

            if (filename == null)
            {
                // LUCENENET specific - our wrapper console shows the correct usage
                throw new ArgumentException("ERROR: CFS-FILE is required");
                //Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
                //return;
            }

            Store.Directory       dir     = null;
            CompoundFileDirectory cfr     = null;
            IOContext             context = IOContext.READ;

            try
            {
                FileInfo file    = new FileInfo(filename);
                string   dirname = file.DirectoryName;
                filename = file.Name;
                if (dirImpl == null)
                {
                    dir = FSDirectory.Open(new DirectoryInfo(dirname));
                }
                else
                {
                    dir = CommandLineUtil.NewFSDirectory(dirImpl, new DirectoryInfo(dirname));
                }

                cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false);

                string[] files = cfr.ListAll();
                ArrayUtil.TimSort(files); // sort the array of filename so that the output is more readable

                for (int i = 0; i < files.Length; ++i)
                {
                    long len = cfr.FileLength(files[i]);

                    if (extract)
                    {
                        Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
                        using (IndexInput ii = cfr.OpenInput(files[i], context))
                        {
                            using (FileStream f = new FileStream(files[i], FileMode.Open, FileAccess.ReadWrite))
                            {
                                // read and write with a small buffer, which is more effective than reading byte by byte
                                byte[] buffer = new byte[1024];
                                int    chunk  = buffer.Length;
                                while (len > 0)
                                {
                                    int bufLen = (int)Math.Min(chunk, len);
                                    ii.ReadBytes(buffer, 0, bufLen);
                                    f.Write(buffer, 0, bufLen);
                                    len -= bufLen;
                                }
                            }
                        }
                    }
                    else
                    {
                        Console.WriteLine(files[i] + ": " + len + " bytes");
                    }
                }
            }
            catch (IOException ioe)
            {
                Console.WriteLine(ioe.ToString());
                //Console.Write(ioe.StackTrace);
            }
            finally
            {
                try
                {
                    if (dir != null)
                    {
                        dir.Dispose();
                    }
                    if (cfr != null)
                    {
                        cfr.Dispose();
                    }
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.ToString());
                    //Console.Write(ioe.StackTrace);
                }
            }
        }
            public override void Run()
            {
                Document  doc        = new Document();
                FieldType customType = new FieldType(TextField.TYPE_STORED);

                customType.StoreTermVectors         = true;
                customType.StoreTermVectorPositions = true;
                customType.StoreTermVectorOffsets   = true;

                doc.Add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
                doc.Add(new NumericDocValuesField("dv", 5));

                int idUpto    = 0;
                int fullCount = 0;
                // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + timeToRunInMilliseconds; // LUCENENET specific: added the ability to change how much time to alot

                do
                {
                    try
                    {
                        writer.UpdateDocument(new Term("id", "" + (idUpto++)), doc);
                        addCount++;
                    }
                    catch (Exception ioe) when(ioe.IsIOException())
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: expected exc:");
                            Console.WriteLine(ioe.StackTrace);
                        }
                        //System.out.println(Thread.currentThread().getName() + ": hit exc");
                        //ioConsole.WriteLine(e.StackTrace);
                        if (ioe.Message.StartsWith("fake disk full at", StringComparison.Ordinal) || ioe.Message.Equals("now failing on purpose", StringComparison.Ordinal))
                        {
                            diskFull = true;

                            try
                            {
                                Thread.Sleep(1);
                            }
                            catch (Exception ie) when(ie.IsInterruptedException())
                            {
                                throw new Util.ThreadInterruptedException(ie);
                            }

                            if (fullCount++ >= 5)
                            {
                                break;
                            }
                        }
                        else
                        {
                            if (noErrors)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected IOException:");
                                Console.WriteLine(ioe.StackTrace);
                                error = ioe;
                            }
                            break;
                        }
                    }
                    catch (Exception t) when(t.IsThrowable())
                    {
                        //Console.WriteLine(t.StackTrace);
                        if (noErrors)
                        {
                            Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected Throwable:");
                            Console.WriteLine(t.StackTrace);
                            error = t;
                        }
                        break;
                    }
                } while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }
            public override void Run()
            {
                for (int iter = 0; iter < iters && !failed.Value; iter++)
                {
                    //final int x = Random().nextInt(5);
                    int x = Random.Next(3);
                    try
                    {
                        switch (x)
                        {
                        case 0:
                            rollbackLock.@Lock();
                            if (Verbose)
                            {
                                Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now rollback");
                            }
                            try
                            {
                                writerRef.Value.Rollback();
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": rollback done; now open new writer");
                                }
                                writerRef.Value =
                                    new IndexWriter(d, NewIndexWriterConfig(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                                                        outerInstance,
#endif
                                                        TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                            }
                            finally
                            {
                                rollbackLock.Unlock();
                            }
                            break;

                        case 1:
                            commitLock.@Lock();
                            if (Verbose)
                            {
                                Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now commit");
                            }
                            try
                            {
                                if (Random.NextBoolean())
                                {
                                    writerRef.Value.PrepareCommit();
                                }
                                writerRef.Value.Commit();
                            }
                            catch (Exception ace) when(ace.IsAlreadyClosedException())
                            {
                                // ok
                            }
                            catch (NullReferenceException)     // LUCENENET TODO: - NullReferenceException must be allowed to propagate so we can defensively avoid it in .NET
                            {
                                // ok
                            }
                            finally
                            {
                                commitLock.Unlock();
                            }
                            break;

                        case 2:
                            if (Verbose)
                            {
                                Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now add");
                            }
                            try
                            {
                                writerRef.Value.AddDocument(docs.NextDoc());
                            }
                            catch (Exception ace) when(ace.IsAlreadyClosedException())
                            {
                                // ok
                            }
                            catch (NullReferenceException)     // LUCENENET TODO: - NullReferenceException must be allowed to propagate so we can defensively avoid it in .NET
                            {
                                // ok
                            }
                            catch (Exception ae) when(ae.IsAssertionError())
                            {
                                // ok
                            }
                            break;
                        }
                    }
                    catch (Exception t) when(t.IsThrowable())
                    {
                        failed.Value = (true);
                        throw RuntimeException.Create(t);
                    }
                }
            }
        // Runs test, with multiple threads, using the specific
        // failure to trigger an IOException
        public virtual void TestMultipleThreadsFailure(Failure failure)
        {
            int NUM_THREADS = 3;

            for (int iter = 0; iter < 2; iter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                MockDirectoryWrapper dir    = NewMockDirectory();
                IndexWriter          writer = new IndexWriter(
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                    .SetMaxBufferedDocs(2)
                    .SetMergeScheduler(new ConcurrentMergeScheduler())
                    .SetMergePolicy(NewLogMergePolicy(4)));
                ((IConcurrentMergeScheduler)writer.Config.MergeScheduler).SetSuppressExceptions();

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, true, NewField);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                Thread.Sleep(10);

                dir.FailOn(failure);
                failure.SetDoFail();

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                    Assert.IsTrue(threads[i].error == null, "hit unexpected Throwable");
                }

                bool success = false;
                try
                {
                    writer.Dispose(false);
                    success = true;
                }
                catch (Exception ioe) when(ioe.IsIOException())
                {
                    failure.ClearDoFail();
                    writer.Dispose(false);
                }
                if (Verbose)
                {
                    Console.WriteLine("TEST: success=" + success);
                }

                if (success)
                {
                    IndexReader reader  = DirectoryReader.Open(dir);
                    IBits       delDocs = MultiFields.GetLiveDocs(reader);
                    for (int j = 0; j < reader.MaxDoc; j++)
                    {
                        if (delDocs == null || !delDocs.Get(j))
                        {
                            reader.Document(j);
                            reader.GetTermVectors(j);
                        }
                    }
                    reader.Dispose();
                }

                dir.Dispose();
            }
        }
        public virtual void TestCloseWithThreads()
        {
            int NUM_THREADS   = 3;
            int numIterations = TestNightly ? 7 : 3;

            for (int iter = 0; iter < numIterations; iter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory   dir    = NewDirectory();
                IndexWriter writer = new IndexWriter(
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                    .SetMaxBufferedDocs(10)
                    .SetMergeScheduler(new ConcurrentMergeScheduler())
                    .SetMergePolicy(NewLogMergePolicy(4)));
                ((IConcurrentMergeScheduler)writer.Config.MergeScheduler).SetSuppressExceptions();

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, false, NewField)

                                 // LUCENENET NOTE - ConcurrentMergeScheduler
                                 // used to take too long for this test to index a single document
                                 // so, increased the time from 200 to 300 ms.
                                 // But it has now been restored to 200 ms like Lucene.
                    {
                        timeToRunInMilliseconds = 200
                    };
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                bool done = false;
                while (!done)
                {
                    Thread.Sleep(100);
                    for (int i = 0; i < NUM_THREADS; i++)
                    // only stop when at least one thread has added a doc
                    {
                        if (threads[i].addCount > 0)
                        {
                            done = true;
                            break;
                        }
                        else if (!threads[i].IsAlive)
                        {
                            Assert.Fail("thread failed before indexing a single document");
                        }
                    }
                }

                if (Verbose)
                {
                    Console.WriteLine("\nTEST: now close");
                }
                writer.Dispose(false);

                // Make sure threads that are adding docs are not hung:
                for (int i = 0; i < NUM_THREADS; i++)
                {
                    // Without fix for LUCENE-1130: one of the
                    // threads will hang
                    threads[i].Join();
                    if (threads[i].IsAlive)
                    {
                        Assert.Fail("thread seems to be hung");
                    }
                }

                // Quick test to make sure index is not corrupt:
                IndexReader reader = DirectoryReader.Open(dir);
                DocsEnum    tdocs  = TestUtil.Docs(Random, reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0);
                int         count  = 0;
                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    count++;
                }
                Assert.IsTrue(count > 0);
                reader.Dispose();

                dir.Dispose();
            }
        }
Example #15
0
        [AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test fails on x86 on .NET Framework in Release mode only
#endif
        public virtual void TestBS2DisjunctionNextVsAdvance()
        {
            Directory         d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, d);
            int numDocs = AtLeast(300);

            for (int docUpto = 0; docUpto < numDocs; docUpto++)
            {
                string contents = "a";
                if (Random.Next(20) <= 16)
                {
                    contents += " b";
                }
                if (Random.Next(20) <= 8)
                {
                    contents += " c";
                }
                if (Random.Next(20) <= 4)
                {
                    contents += " d";
                }
                if (Random.Next(20) <= 2)
                {
                    contents += " e";
                }
                if (Random.Next(20) <= 1)
                {
                    contents += " f";
                }
                Document doc = new Document();
                doc.Add(new TextField("field", contents, Field.Store.NO));
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            w.Dispose();

            for (int iter = 0; iter < 10 * RandomMultiplier; iter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("iter=" + iter);
                }
                IList <string> terms = new List <string> {
                    "a", "b", "c", "d", "e", "f"
                };
                int numTerms = TestUtil.NextInt32(Random, 1, terms.Count);
                while (terms.Count > numTerms)
                {
                    terms.RemoveAt(Random.Next(terms.Count));
                }

                if (Verbose)
                {
                    Console.WriteLine("  terms=" + terms);
                }

                BooleanQuery q = new BooleanQuery();
                foreach (string term in terms)
                {
                    q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD));
                }

                Weight weight = s.CreateNormalizedWeight(q);

                Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null);

                // First pass: just use .NextDoc() to gather all hits
                IList <ScoreDoc> hits = new List <ScoreDoc>();
                while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore()));
                }

                if (Verbose)
                {
                    Console.WriteLine("  " + hits.Count + " hits");
                }

                // Now, randomly next/advance through the list and
                // verify exact match:
                for (int iter2 = 0; iter2 < 10; iter2++)
                {
                    weight = s.CreateNormalizedWeight(q);
                    scorer = weight.GetScorer(s.m_leafContexts[0], null);

                    if (Verbose)
                    {
                        Console.WriteLine("  iter2=" + iter2);
                    }

                    int upto = -1;
                    while (upto < hits.Count)
                    {
                        int nextUpto;
                        int nextDoc;
                        int left = hits.Count - upto;
                        if (left == 1 || Random.nextBoolean())
                        {
                            // next
                            nextUpto = 1 + upto;
                            nextDoc  = scorer.NextDoc();
                        }
                        else
                        {
                            // advance
                            int inc = TestUtil.NextInt32(Random, 1, left - 1);
                            nextUpto = inc + upto;
                            nextDoc  = scorer.Advance(hits[nextUpto].Doc);
                        }

                        if (nextUpto == hits.Count)
                        {
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc);
                        }
                        else
                        {
                            ScoreDoc hit = hits[nextUpto];
                            Assert.AreEqual(hit.Doc, nextDoc);
                            // Test for precise float equality:
                            Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore());
                        }
                        upto = nextUpto;
                    }
                }
            }

            r.Dispose();
            d.Dispose();
        }
Example #16
0
        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random.NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.GetReader();
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random.Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (Verbose)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random.Next(scoreModeLength);
                    if (Verbose)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (Verbose)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousClass2(scoreDocsInOrder, actualResult,
                                                                      topScoreDocCollector));
                    // Asserting bit set...
                    if (Verbose)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality);
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality);
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.GetTopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc));
                            Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score));
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }
Example #17
0
        private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
                                                    bool multipleValuesPerDocument, bool scoreDocsInOrder)
        {
            IndexIterationContext context = new IndexIterationContext();
            int numRandomValues           = nDocs / 2;

            context.RandomUniqueValues = new string[numRandomValues];
            ISet <string> trackSet = new JCG.HashSet <string>();

            context.RandomFrom = new bool[numRandomValues];
            for (int i = 0; i < numRandomValues; i++)
            {
                string uniqueRandomValue;
                do
                {
                    uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    //        uniqueRandomValue = TestUtil.randomSimpleString(random);
                } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue));
                // Generate unique values and empty strings aren't allowed.
                trackSet.Add(uniqueRandomValue);
                context.RandomFrom[i]         = Random.NextBoolean();
                context.RandomUniqueValues[i] = uniqueRandomValue;
            }

            RandomDoc[] docs = new RandomDoc[nDocs];
            for (int i = 0; i < nDocs; i++)
            {
                string   id       = Convert.ToString(i, CultureInfo.InvariantCulture);
                int      randomI  = Random.Next(context.RandomUniqueValues.Length);
                string   value    = context.RandomUniqueValues[randomI];
                Document document = new Document();
                document.Add(NewTextField(Random, "id", id, Field.Store.NO));
                document.Add(NewTextField(Random, "value", value, Field.Store.NO));

                bool from = context.RandomFrom[randomI];
                int  numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1;
                docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
                for (int j = 0; j < numberOfLinkValues; j++)
                {
                    string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)];
                    docs[i].linkValues.Add(linkValue);
                    if (from)
                    {
                        if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs))
                        {
                            context.FromDocuments[linkValue] = fromDocs = new JCG.List <RandomDoc>();
                        }
                        if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs))
                        {
                            context.RandomValueFromDocs[value] = randomValueFromDocs = new JCG.List <RandomDoc>();
                        }

                        fromDocs.Add(docs[i]);
                        randomValueFromDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO));
                    }
                    else
                    {
                        if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments))
                        {
                            context.ToDocuments[linkValue] = toDocuments = new JCG.List <RandomDoc>();
                        }
                        if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs))
                        {
                            context.RandomValueToDocs[value] = randomValueToDocs = new JCG.List <RandomDoc>();
                        }

                        toDocuments.Add(docs[i]);
                        randomValueToDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO));
                    }
                }

                RandomIndexWriter w;
                if (from)
                {
                    w = fromWriter;
                }
                else
                {
                    w = toWriter;
                }

                w.AddDocument(document);
                if (Random.Next(10) == 4)
                {
                    w.Commit();
                }
                if (Verbose)
                {
                    Console.WriteLine("Added document[" + docs[i].id + "]: " + document);
                }
            }

            // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
            // any ScoreMode.
            IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader());
            IndexSearcher toSearcher   = NewSearcher(toWriter.GetReader());

            for (int i = 0; i < context.RandomUniqueValues.Length; i++)
            {
                string uniqueRandomValue = context.RandomUniqueValues[i];
                string fromField;
                string toField;
                IDictionary <string, IDictionary <int, JoinScore> > queryVals;
                if (context.RandomFrom[i])
                {
                    fromField = "from";
                    toField   = "to";
                    queryVals = context.FromHitsToJoinScore;
                }
                else
                {
                    fromField = "to";
                    toField   = "from";
                    queryVals = context.ToHitsToJoinScore;
                }
                IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousClass3(fromField, joinValueToJoinScores));
                }
                else
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousClass4(fromField, joinValueToJoinScores));
                }

                IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    if (scoreDocsInOrder)
                    {
                        AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
                        Terms        terms = slowCompositeReader.GetTerms(toField);
                        if (terms != null)
                        {
                            DocsEnum  docsEnum  = null;
                            TermsEnum termsEnum = null;
                            JCG.SortedSet <BytesRef> joinValues =
                                new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
                            joinValues.UnionWith(joinValueToJoinScores.Keys);
                            foreach (BytesRef joinValue in joinValues)
                            {
                                termsEnum = terms.GetEnumerator(termsEnum);
                                if (termsEnum.SeekExact(joinValue))
                                {
                                    docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE);
                                    JoinScore joinScore = joinValueToJoinScores[joinValue];

                                    for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc())
                                    {
                                        // First encountered join value determines the score.
                                        // Something to keep in mind for many-to-many relations.
                                        if (!docToJoinScore.ContainsKey(doc))
                                        {
                                            docToJoinScore[doc] = joinScore;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        toSearcher.Search(new MatchAllDocsQuery(),
                                          new CollectorAnonymousClass5(toField, joinValueToJoinScores,
                                                                       docToJoinScore));
                    }
                }
                else
                {
                    toSearcher.Search(new MatchAllDocsQuery(),
                                      new CollectorAnonymousClass6(toField, joinValueToJoinScores,
                                                                   docToJoinScore));
                }
                queryVals[uniqueRandomValue] = docToJoinScore;
            }

            fromSearcher.IndexReader.Dispose();
            toSearcher.IndexReader.Dispose();

            return(context);
        }
Example #18
0
                public override CollectionStatistics CollectionStatistics(string field)
                {
                    // TODO: we could compute this on init and cache,
                    // since we are re-inited whenever any nodes have a
                    // new reader
                    long docCount         = 0;
                    long sumTotalTermFreq = 0;
                    long sumDocFreq       = 0;
                    long maxDoc           = 0;

                    for (int nodeID = 0; nodeID < nodeVersions.Length; nodeID++)
                    {
                        FieldAndShardVersion key = new FieldAndShardVersion(nodeID, nodeVersions[nodeID], field);
                        CollectionStatistics nodeStats;
                        if (nodeID == MyNodeID)
                        {
                            nodeStats = base.CollectionStatistics(field);
                        }
                        else
                        {
                            outerInstance.collectionStatsCache.TryGetValue(key, out nodeStats);
                        }
                        if (nodeStats is null)
                        {
                            Console.WriteLine("coll stats myNodeID=" + MyNodeID + ": " + Collections.ToString(outerInstance.collectionStatsCache.Keys));
                        }
                        // Collection stats are pre-shared on reopen, so,
                        // we better not have a cache miss:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(nodeStats != null, "myNodeID={0} nodeID={1} version={2} field={3}", MyNodeID, nodeID, nodeVersions[nodeID], field);
                        }

                        long nodeDocCount = nodeStats.DocCount;
                        if (docCount >= 0 && nodeDocCount >= 0)
                        {
                            docCount += nodeDocCount;
                        }
                        else
                        {
                            docCount = -1;
                        }

                        long nodeSumTotalTermFreq = nodeStats.SumTotalTermFreq;
                        if (sumTotalTermFreq >= 0 && nodeSumTotalTermFreq >= 0)
                        {
                            sumTotalTermFreq += nodeSumTotalTermFreq;
                        }
                        else
                        {
                            sumTotalTermFreq = -1;
                        }

                        long nodeSumDocFreq = nodeStats.SumDocFreq;
                        if (sumDocFreq >= 0 && nodeSumDocFreq >= 0)
                        {
                            sumDocFreq += nodeSumDocFreq;
                        }
                        else
                        {
                            sumDocFreq = -1;
                        }

                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(nodeStats.MaxDoc >= 0);
                        }
                        maxDoc += nodeStats.MaxDoc;
                    }

                    return(new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq));
                }
Example #19
0
        public virtual void Test2BTerms_Mem()
        {
            if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal))
            {
                throw RuntimeException.Create("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }
            dir.CheckIndexOnDispose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(new ConcurrentMergeScheduler())
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(OpenMode.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random, TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond;                                                          // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                }
                savedTerms = ts.savedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms == null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Example #20
0
            public override void Run()
            {
                // TODO: would be better if this were cross thread, so that we make sure one thread deleting anothers added docs works:
                IList <string>  toDeleteIDs     = new List <string>();
                IList <SubDocs> toDeleteSubDocs = new List <SubDocs>();

                while (Environment.TickCount < stopTime && !outerInstance.m_failed)
                {
                    try
                    {
                        // Occasional longish pause if running
                        // nightly
                        if (LuceneTestCase.TestNightly && Random.Next(6) == 3)
                        {
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": now long sleep");
                            }
                            //Thread.Sleep(TestUtil.NextInt32(Random, 50, 500));
                            // LUCENENET specific - Reduced amount of pause to keep the total
                            // Nightly test time under 1 hour
                            Thread.Sleep(TestUtil.NextInt32(Random, 50, 250));
                        }

                        // Rate limit ingest rate:
                        if (Random.Next(7) == 5)
                        {
                            Thread.Sleep(TestUtil.NextInt32(Random, 1, 10));
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": done sleep");
                            }
                        }

                        Document doc = docs.NextDoc();
                        if (doc == null)
                        {
                            break;
                        }

                        // Maybe add randomly named field
                        string addedField;
                        if (Random.NextBoolean())
                        {
                            addedField = "extra" + Random.Next(40);
                            doc.Add(NewTextField(addedField, "a random field", Field.Store.YES));
                        }
                        else
                        {
                            addedField = null;
                        }

                        if (Random.NextBoolean())
                        {
                            if (Random.NextBoolean())
                            {
                                // Add/update doc block:
                                string  packID;
                                SubDocs delSubDocs;
                                if (toDeleteSubDocs.Count > 0 && Random.NextBoolean())
                                {
                                    delSubDocs = toDeleteSubDocs[Random.Next(toDeleteSubDocs.Count)];
                                    if (Debugging.AssertsEnabled)
                                    {
                                        Debugging.Assert(!delSubDocs.Deleted);
                                    }
                                    toDeleteSubDocs.Remove(delSubDocs);
                                    // Update doc block, replacing prior packID
                                    packID = delSubDocs.PackID;
                                }
                                else
                                {
                                    delSubDocs = null;
                                    // Add doc block, using new packID
                                    packID = outerInstance.m_packCount.GetAndIncrement().ToString(CultureInfo.InvariantCulture);
                                }

                                Field            packIDField = NewStringField("packID", packID, Field.Store.YES);
                                IList <string>   docIDs      = new List <string>();
                                SubDocs          subDocs     = new SubDocs(packID, docIDs);
                                IList <Document> docsList    = new List <Document>();

                                allSubDocs.Enqueue(subDocs);
                                doc.Add(packIDField);
                                docsList.Add(TestUtil.CloneDocument(doc));
                                docIDs.Add(doc.Get("docid"));

                                int maxDocCount = TestUtil.NextInt32(Random, 1, 10);
                                while (docsList.Count < maxDocCount)
                                {
                                    doc = docs.NextDoc();
                                    if (doc == null)
                                    {
                                        break;
                                    }
                                    docsList.Add(TestUtil.CloneDocument(doc));
                                    docIDs.Add(doc.Get("docid"));
                                }
                                outerInstance.m_addCount.AddAndGet(docsList.Count);

                                Term packIDTerm = new Term("packID", packID);

                                if (delSubDocs != null)
                                {
                                    delSubDocs.Deleted = true;
                                    delIDs.UnionWith(delSubDocs.SubIDs);
                                    outerInstance.m_delCount.AddAndGet(delSubDocs.SubIDs.Count);
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": update pack packID=" + delSubDocs.PackID +
                                                          " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs));
                                    }
                                    outerInstance.UpdateDocuments(packIDTerm, docsList);
                                }
                                else
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": add pack packID=" + packID +
                                                          " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs));
                                    }
                                    outerInstance.AddDocuments(packIDTerm, docsList);
                                }
                                doc.RemoveField("packID");

                                if (Random.Next(5) == 2)
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + packID);
                                    }
                                    toDeleteSubDocs.Add(subDocs);
                                }
                            }
                            else
                            {
                                // Add single doc
                                string docid = doc.Get("docid");
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": add doc docid:" + docid);
                                }
                                outerInstance.AddDocument(new Term("docid", docid), doc);
                                outerInstance.m_addCount.GetAndIncrement();

                                if (Random.Next(5) == 3)
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid"));
                                    }
                                    toDeleteIDs.Add(docid);
                                }
                            }
                        }
                        else
                        {
                            // Update single doc, but we never re-use
                            // and ID so the delete will never
                            // actually happen:
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": update doc id:" + doc.Get("docid"));
                            }
                            string docid = doc.Get("docid");
                            outerInstance.UpdateDocument(new Term("docid", docid), doc);
                            outerInstance.m_addCount.GetAndIncrement();

                            if (Random.Next(5) == 3)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid"));
                                }
                                toDeleteIDs.Add(docid);
                            }
                        }

                        if (Random.Next(30) == 17)
                        {
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": apply " + toDeleteIDs.Count + " deletes");
                            }
                            foreach (string id in toDeleteIDs)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": del term=id:" + id);
                                }
                                outerInstance.DeleteDocuments(new Term("docid", id));
                            }
                            int count = outerInstance.m_delCount.AddAndGet(toDeleteIDs.Count);
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": tot " + count + " deletes");
                            }
                            delIDs.UnionWith(toDeleteIDs);
                            toDeleteIDs.Clear();

                            foreach (SubDocs subDocs in toDeleteSubDocs)
                            {
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(!subDocs.Deleted);
                                }
                                delPackIDs.Add(subDocs.PackID);
                                outerInstance.DeleteDocuments(new Term("packID", subDocs.PackID));
                                subDocs.Deleted = true;
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": del subs: " + subDocs.SubIDs + " packID=" + subDocs.PackID);
                                }
                                delIDs.UnionWith(subDocs.SubIDs);
                                outerInstance.m_delCount.AddAndGet(subDocs.SubIDs.Count);
                            }
                            toDeleteSubDocs.Clear();
                        }
                        if (addedField != null)
                        {
                            doc.RemoveField(addedField);
                        }
                    }
                    catch (Exception t)
                    {
                        Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
                        Console.WriteLine(t.ToString());
                        Console.Write(t.StackTrace);
                        outerInstance.m_failed.Value = (true);
                        throw new Exception(t.ToString(), t);
                    }
                }
                if (Verbose)
                {
                    Console.WriteLine(Thread.CurrentThread.Name + ": indexing done");
                }

                outerInstance.DoAfterIndexingThreadDone();
            }
Example #21
0
            // Swap in S, in place of E:
            private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos)
            {
                int savLength = term.Length;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(term.Offset == 0);
                }

                // The 3 bytes starting at downTo make up 1
                // unicode character:
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(IsHighBMPChar(term.Bytes, pos));
                }

                // NOTE: we cannot make this assert, because
                // AutomatonQuery legitimately sends us malformed UTF8
                // (eg the UTF8 bytes with just 0xee)
                // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();

                // Save the bytes && length, since we need to
                // restore this if seek "back" finds no matching
                // terms
                if (term.Bytes.Length < 4 + pos)
                {
                    term.Grow(4 + pos);
                }

                scratch[0] = (sbyte)term.Bytes[pos];
                scratch[1] = (sbyte)term.Bytes[pos + 1];
                scratch[2] = (sbyte)term.Bytes[pos + 2];

                term.Bytes[pos]     = 0xf0;
                term.Bytes[pos + 1] = 0x90;
                term.Bytes[pos + 2] = 0x80;
                term.Bytes[pos + 3] = 0x80;
                term.Length         = 4 + pos;

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }

                // Seek "back":
                outerInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true);

                // Test if the term we seek'd to in fact found a
                // surrogate pair at the same position as the E:
                Term t2 = te.Term();

                // Cannot be null (or move to next field) because at
                // "worst" it'd seek to the same term we are on now,
                // unless we are being called from seek
                if (t2 is null || t2.Field != internedFieldName)
                {
                    return(false);
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text));
                }

                // Now test if prefix is identical and we found
                // a non-BMP char at the same position:
                BytesRef b2 = t2.Bytes;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(b2.Offset == 0);
                }

                bool matches;

                if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos))
                {
                    matches = true;
                    for (int i = 0; i < pos; i++)
                    {
                        if (term.Bytes[i] != b2.Bytes[i])
                        {
                            matches = false;
                            break;
                        }
                    }
                }
                else
                {
                    matches = false;
                }

                // Restore term:
                term.Length         = savLength;
                term.Bytes[pos]     = (byte)scratch[0];
                term.Bytes[pos + 1] = (byte)scratch[1];
                term.Bytes[pos + 2] = (byte)scratch[2];

                return(matches);
            }
Example #22
0
 public override void Run()
 {
     if (Verbose)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (Environment.TickCount < stopTimeMS)
     {
         try
         {
             IndexSearcher s = outerInstance.GetCurrentSearcher();
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     string source;
                     diagnostics.TryGetValue("source", out source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    // LUCENENET: ConditionalWeakTable doesn't have ContainsKey, so we normalize to TryGetValue
                                    !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.TryGetValue(segReader.core, out BooleanRef _));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetEnumerator();
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount / 30;
                         shift   = Random.Next(trigger);
                     }
                     while (Environment.TickCount < stopTimeMS)
                     {
                         if (!termsEnum.MoveNext())
                         {
                             totTermCount.Value = seenTermCount;
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", termsEnum.Term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t)
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.m_failed.Value = (true);
             Console.WriteLine(t.ToString());
             throw new Exception(t.ToString(), t);
         }
     }
 }
Example #23
0
        // randomly seeks to term that we know exists, then next's
        // from there
        private void DoTestSeekExists(Random r, IList <Term> fieldTerms, IndexReader reader)
        {
            IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>();

            // Test random seek to existing term, then enum:
            if (Verbose)
            {
                Console.WriteLine("\nTEST: top now seek");
            }

            int num = AtLeast(100);

            for (int iter = 0; iter < num; iter++)
            {
                // pick random field+term
                int    spot  = r.Next(fieldTerms.Count);
                Term   term  = fieldTerms[spot];
                string field = term.Field;

                if (Verbose)
                {
                    Console.WriteLine("TEST: exist seek field=" + field + " term=" + UnicodeUtil.ToHexString(term.Text));
                }

                // seek to it
                if (!tes.TryGetValue(field, out TermsEnum te))
                {
                    te         = MultiFields.GetTerms(reader, field).GetEnumerator();
                    tes[field] = te;
                }

                if (Verbose)
                {
                    Console.WriteLine("  done get enum");
                }

                // seek should find the term
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(term.Bytes));

                // now .next() this many times:
                int ct = TestUtil.NextInt32(r, 5, 100);
                for (int i = 0; i < ct; i++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: now next()");
                    }
                    if (1 + spot + i >= fieldTerms.Count)
                    {
                        break;
                    }
                    term = fieldTerms[1 + spot + i];
                    if (!term.Field.Equals(field, StringComparison.Ordinal))
                    {
                        Assert.IsFalse(te.MoveNext());
                        break;
                    }
                    else
                    {
                        Assert.IsTrue(te.MoveNext());
                        BytesRef t = te.Term;

                        if (Verbose)
                        {
                            Console.WriteLine("  got term=" + (t == null ? null : UnicodeUtil.ToHexString(t.Utf8ToString())));
                            Console.WriteLine("       exp=" + UnicodeUtil.ToHexString(term.Text.ToString()));
                        }

                        Assert.AreEqual(term.Bytes, t);
                    }
                }
            }
        }
        public virtual void TestFlushExceptions()
        {
            MockDirectoryWrapper directory = NewMockDirectory();
            FailOnlyOnFlush      failure   = new FailOnlyOnFlush(this);

            directory.FailOn(failure);

            IndexWriter writer  = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2));
            Document    doc     = new Document();
            Field       idField = NewStringField("id", "", Field.Store.YES);

            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: iter=" + i);
                }

                for (int j = 0; j < 20; j++)
                {
                    idField.SetStringValue(Convert.ToString(i * 20 + j));
                    writer.AddDocument(doc);
                }

                // must cycle here because sometimes the merge flushes
                // the doc we just added and so there's nothing to
                // flush, and we don't hit the exception
                while (true)
                {
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, true);
                        if (failure.hitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine(ioe.StackTrace);
                        }
                        failure.ClearDoFail();
                        break;
                    }
                }
                Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs);
            }

            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(directory);

            Assert.AreEqual(200 + extraCount, reader.NumDocs);
            reader.Dispose();
            directory.Dispose();
        }
Example #25
0
        public virtual void TestSurrogatesOrder()
        {
            Directory dir    = NewDirectory();
            var       config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            config.Codec = new PreFlexRWCodec();
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, config);

            int numField = TestUtil.NextInt32(Random, 2, 5);

            int uniqueTermCount = 0;

            int tc = 0;

            var fieldTerms = new JCG.List <Term>();

            for (int f = 0; f < numField; f++)
            {
                string field    = "f" + f;
                int    numTerms = AtLeast(200);

                ISet <string> uniqueTerms = new JCG.HashSet <string>();

                for (int i = 0; i < numTerms; i++)
                {
                    string term = GetRandomString(Random) + "_ " + (tc++);
                    uniqueTerms.Add(term);
                    fieldTerms.Add(new Term(field, term));
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewStringField(field, term, Field.Store.NO));
                    w.AddDocument(doc);
                }
                uniqueTermCount += uniqueTerms.Count;
            }

            IndexReader reader = w.GetReader();

            if (Verbose)
            {
                fieldTerms.Sort(termAsUTF16Comparer);

                Console.WriteLine("\nTEST: UTF16 order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            // sorts in code point order:
            fieldTerms.Sort();

            if (Verbose)
            {
                Console.WriteLine("\nTEST: codepoint order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            Term[] fieldTermsArray = fieldTerms.ToArray();

            //SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);

            //FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
            //Assert.IsNotNull(fields);

            DoTestStraightEnum(fieldTerms, reader, uniqueTermCount);
            DoTestSeekExists(Random, fieldTerms, reader);
            DoTestSeekDoesNotExist(Random, numField, fieldTerms, fieldTermsArray, reader);

            reader.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Example #26
0
        public virtual void TestRandom()
        {
            int alphabetSize = TestUtil.NextInt32(Random, 2, 7);

            int docLen = AtLeast(3000);
            //final int docLen = 50;

            string document = GetRandomString('a', alphabetSize, docLen);

            if (Verbose)
            {
                Console.WriteLine("TEST: doc=" + document);
            }

            int numSyn = AtLeast(5);
            //final int numSyn = 2;

            IDictionary <string, OneSyn> synMap = new Dictionary <string, OneSyn>();
            IList <OneSyn> syns  = new JCG.List <OneSyn>();
            bool           dedup = Random.nextBoolean();

            if (Verbose)
            {
                Console.WriteLine("  dedup=" + dedup);
            }
            b = new SynonymMap.Builder(dedup);
            for (int synIDX = 0; synIDX < numSyn; synIDX++)
            {
                string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt32(Random, 1, 5)).Trim();
                if (!synMap.TryGetValue(synIn, out OneSyn s) || s is null)
                {
                    s     = new OneSyn();
                    s.@in = synIn;
                    syns.Add(s);
                    s.@out        = new JCG.List <string>();
                    synMap[synIn] = s;
                    s.keepOrig    = Random.nextBoolean();
                }
                string synOut = GetRandomString('0', 10, TestUtil.NextInt32(Random, 1, 5)).Trim();
                [email protected](synOut);
                Add(synIn, synOut, s.keepOrig);
                if (Verbose)
                {
                    Console.WriteLine("  syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig);
                }
            }

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.Reset();
            assertTrue(tokensIn.IncrementToken());
            assertFalse(tokensIn.IncrementToken());
            tokensIn.End();
            tokensIn.Dispose();

            tokensOut  = new SynonymFilter(tokensIn, b.Build(), true);
            termAtt    = tokensOut.AddAttribute <ICharTermAttribute>();
            posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>();
            posLenAtt  = tokensOut.AddAttribute <IPositionLengthAttribute>();
            offsetAtt  = tokensOut.AddAttribute <IOffsetAttribute>();

            if (dedup)
            {
                PruneDups(syns);
            }

            string expected = SlowSynMatcher(document, syns, 5);

            if (Verbose)
            {
                Console.WriteLine("TEST: expected=" + expected);
            }

            Verify(document, expected);
        }
            public virtual void IndexDoc()
            {
                Document d = new Document();

                FieldType customType1 = new FieldType(TextField.TYPE_STORED);

                customType1.IsTokenized = false;
                customType1.OmitNorms   = true;

                List <Field> fields   = new List <Field>();
                string       idString = IdString;
                Field        idField  = NewField("id", idString, customType1);

                fields.Add(idField);

                int nFields = NextInt(maxFields);

                for (int i = 0; i < nFields; i++)
                {
                    FieldType customType = new FieldType();
                    switch (NextInt(4))
                    {
                    case 0:
                        break;

                    case 1:
                        customType.StoreTermVectors = true;
                        break;

                    case 2:
                        customType.StoreTermVectors         = true;
                        customType.StoreTermVectorPositions = true;
                        break;

                    case 3:
                        customType.StoreTermVectors       = true;
                        customType.StoreTermVectorOffsets = true;
                        break;
                    }

                    switch (NextInt(4))
                    {
                    case 0:
                        customType.IsStored  = true;
                        customType.OmitNorms = true;
                        customType.IsIndexed = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(1), customType));
                        break;

                    case 1:
                        customType.IsIndexed   = true;
                        customType.IsTokenized = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
                        break;

                    case 2:
                        customType.IsStored                 = true;
                        customType.StoreTermVectors         = false;
                        customType.StoreTermVectorOffsets   = false;
                        customType.StoreTermVectorPositions = false;
                        fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
                        break;

                    case 3:
                        customType.IsStored    = true;
                        customType.IsIndexed   = true;
                        customType.IsTokenized = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(bigFieldSize), customType));
                        break;
                    }
                }

                if (sameFieldOrder)
                {
                    fields.Sort(fieldNameComparer);
                }
                else
                {
                    // random placement of id field also
                    fields.Swap(NextInt(fields.Count), 0);
                }

                for (int i = 0; i < fields.Count; i++)
                {
                    d.Add(fields[i]);
                }
                if (Verbose)
                {
                    Console.WriteLine(Thread.CurrentThread.Name + ": indexing id:" + idString);
                }
                w.UpdateDocument(new Term("id", idString), d);
                //System.out.println(Thread.currentThread().getName() + ": indexing "+d);
                docs[idString] = d;
            }
Example #28
0
        // For the output string: separate positions with a space,
        // and separate multiple tokens at each position with a
        // /.  If a token should have end offset != the input
        // token's end offset then add :X to it:

        // TODO: we should probably refactor this guy to use/take analyzer,
        // the tests are a little messy
        private void Verify(string input, string output)
        {
            if (Verbose)
            {
                Console.WriteLine("TEST: verify input=" + input + " expectedOutput=" + output);
            }

            tokensIn.SetReader(new StringReader(input));
            tokensOut.Reset();
            string[] expected     = output.Split(' ').TrimEnd();
            int      expectedUpto = 0;

            while (tokensOut.IncrementToken())
            {
                if (Verbose)
                {
                    Console.WriteLine("  incr token=" + termAtt.ToString() + " posIncr=" + posIncrAtt.PositionIncrement + " startOff=" + offsetAtt.StartOffset + " endOff=" + offsetAtt.EndOffset);
                }

                assertTrue(expectedUpto < expected.Length);
                int startOffset = offsetAtt.StartOffset;
                int endOffset   = offsetAtt.EndOffset;

                string[] expectedAtPos = expected[expectedUpto++].Split('/').TrimEnd();
                for (int atPos = 0; atPos < expectedAtPos.Length; atPos++)
                {
                    if (atPos > 0)
                    {
                        assertTrue(tokensOut.IncrementToken());
                        if (Verbose)
                        {
                            Console.WriteLine("  incr token=" + termAtt.ToString() + " posIncr=" + posIncrAtt.PositionIncrement + " startOff=" + offsetAtt.StartOffset + " endOff=" + offsetAtt.EndOffset);
                        }
                    }
                    int    colonIndex    = expectedAtPos[atPos].IndexOf(':');
                    int    underbarIndex = expectedAtPos[atPos].IndexOf('_');
                    string expectedToken;
                    int    expectedEndOffset;
                    int    expectedPosLen;
                    if (colonIndex != -1)
                    {
                        expectedToken = expectedAtPos[atPos].Substring(0, colonIndex - 0);
                        if (underbarIndex != -1)
                        {
                            expectedEndOffset = int.Parse(expectedAtPos[atPos].Substring(1 + colonIndex, underbarIndex - (1 + colonIndex)), CultureInfo.InvariantCulture);
                            expectedPosLen    = int.Parse(expectedAtPos[atPos].Substring(1 + underbarIndex), CultureInfo.InvariantCulture);
                        }
                        else
                        {
                            expectedEndOffset = int.Parse(expectedAtPos[atPos].Substring(1 + colonIndex), CultureInfo.InvariantCulture);
                            expectedPosLen    = 1;
                        }
                    }
                    else
                    {
                        expectedToken     = expectedAtPos[atPos];
                        expectedEndOffset = endOffset;
                        expectedPosLen    = 1;
                    }
                    assertEquals(expectedToken, termAtt.ToString());
                    assertEquals(atPos == 0 ? 1 : 0, posIncrAtt.PositionIncrement);
                    // start/end offset of all tokens at same pos should
                    // be the same:
                    assertEquals(startOffset, offsetAtt.StartOffset);
                    assertEquals(expectedEndOffset, offsetAtt.EndOffset);
                    assertEquals(expectedPosLen, posLenAtt.PositionLength);
                }
            }
            tokensOut.End();
            tokensOut.Dispose();
            if (Verbose)
            {
                Console.WriteLine("  incr: END");
            }
            assertEquals(expectedUpto, expected.Length);
        }
Example #29
0
        public override FieldsProducer FieldsProducer(SegmentReadState state)
        {
            string     seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT);
            IndexInput @in          = state.Directory.OpenInput(seedFileName, state.Context);
            long       seed         = @in.ReadInt64();

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed);
            }
            @in.Dispose();

            Random random = new J2N.Randomizer(seed);

            int readBufferSize = TestUtil.NextInt32(random, 1, 4096);

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize);
            }

            PostingsReaderBase postingsReader;

            if (random.NextBoolean())
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading Sep postings");
                }
                postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo,
                                                       state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix);
            }
            else
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading Standard postings");
                }
                postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
            }

            if (random.NextBoolean())
            {
                int totTFCutoff = TestUtil.NextInt32(random, 1, 20);
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
                }
                postingsReader = new PulsingPostingsReader(state, postingsReader);
            }

            FieldsProducer fields;
            int            t1 = random.Next(4);

            if (t1 == 0)
            {
                bool success = false;
                try
                {
                    fields  = new FSTTermsReader(state, postingsReader);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }
            }
            else if (t1 == 1)
            {
                bool success = false;
                try
                {
                    fields  = new FSTOrdTermsReader(state, postingsReader);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }
            }
            else if (t1 == 2)
            {
                // Use BlockTree terms dict
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading BlockTree terms dict");
                }

                bool success = false;
                try
                {
                    fields = new BlockTreeTermsReader(state.Directory,
                                                      state.FieldInfos,
                                                      state.SegmentInfo,
                                                      postingsReader,
                                                      state.Context,
                                                      state.SegmentSuffix,
                                                      state.TermsIndexDivisor);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }
            }
            else
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading Block terms dict");
                }
                TermsIndexReaderBase indexReader;
                bool success = false;
                try
                {
                    bool doFixedGap = random.NextBoolean();

                    // randomness diverges from writer, here:
                    if (state.TermsIndexDivisor != -1)
                    {
                        state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10);
                    }

                    if (doFixedGap)
                    {
                        // if termsIndexDivisor is set to -1, we should not touch it. It means a
                        // test explicitly instructed not to load the terms index.
                        if (LuceneTestCase.Verbose)
                        {
                            Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
                        }
                        indexReader = new FixedGapTermsIndexReader(state.Directory,
                                                                   state.FieldInfos,
                                                                   state.SegmentInfo.Name,
                                                                   state.TermsIndexDivisor,
                                                                   BytesRef.UTF8SortedAsUnicodeComparer,
                                                                   state.SegmentSuffix, state.Context);
                    }
                    else
                    {
                        int n2 = random.Next(3);
                        if (n2 == 1)
                        {
                            random.Next();
                        }
                        else if (n2 == 2)
                        {
                            random.NextInt64();
                        }
                        if (LuceneTestCase.Verbose)
                        {
                            Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
                        }
                        indexReader = new VariableGapTermsIndexReader(state.Directory,
                                                                      state.FieldInfos,
                                                                      state.SegmentInfo.Name,
                                                                      state.TermsIndexDivisor,
                                                                      state.SegmentSuffix, state.Context);
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }

                success = false;
                try
                {
                    fields = new BlockTermsReader(indexReader,
                                                  state.Directory,
                                                  state.FieldInfos,
                                                  state.SegmentInfo,
                                                  postingsReader,
                                                  state.Context,
                                                  state.SegmentSuffix);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        try
                        {
                            postingsReader.Dispose();
                        }
                        finally
                        {
                            indexReader.Dispose();
                        }
                    }
                }
            }

            return(fields);
        }
        /*
         * public void testWikipedia()  {
         * final FileInputStream fis = new FileInputStream("/q/lucene/jawiki-20120220-pages-articles.xml");
         * final Reader r = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
         *
         * final long startTimeNS = System.nanoTime();
         * boolean done = false;
         * long compoundCount = 0;
         * long nonCompoundCount = 0;
         * long netOffset = 0;
         * while (!done) {
         *  final TokenStream ts = analyzer.tokenStream("ignored", r);
         *  ts.reset();
         *  final PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
         *  final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
         *  int count = 0;
         *  while (true) {
         *    if (!ts.incrementToken()) {
         *      done = true;
         *      break;
         *    }
         *    count++;
         *    if (posIncAtt.getPositionIncrement() == 0) {
         *      compoundCount++;
         *    } else {
         *      nonCompoundCount++;
         *      if (nonCompoundCount % 1000000 == 0) {
         *        System.out.println(String.format("%.2f msec [pos=%d, %d, %d]",
         *                                         (System.nanoTime()-startTimeNS)/1000000.0,
         *                                         netOffset + offsetAtt.startOffset(),
         *                                         nonCompoundCount,
         *                                         compoundCount));
         *      }
         *    }
         *    if (count == 100000000) {
         *      System.out.println("  again...");
         *      break;
         *    }
         *  }
         *  ts.end();
         *  netOffset += offsetAtt.endOffset();
         * }
         * System.out.println("compoundCount=" + compoundCount + " nonCompoundCount=" + nonCompoundCount);
         * r.close();
         * }
         */


        private void doTestBocchan(int numIterations)
        {
            TextReader reader = new StreamReader(
                this.GetType().getResourceAsStream("bocchan.utf-8"), Encoding.UTF8);
            String line = reader.ReadLine();

            reader.Dispose();

            if (Verbose)
            {
                Console.WriteLine("Test for Bocchan without pre-splitting sentences");
            }

            /*
             * if (numIterations > 1) {
             * // warmup
             * for (int i = 0; i < numIterations; i++) {
             *  final TokenStream ts = analyzer.tokenStream("ignored", line);
             *  ts.reset();
             *  while(ts.incrementToken());
             * }
             * }
             */

            long totalStart = Environment.TickCount;

            for (int i = 0; i < numIterations; i++)
            {
                TokenStream ts = analyzer.GetTokenStream("ignored", line);
                try
                {
                    ts.Reset();
                    while (ts.IncrementToken())
                    {
                        ;
                    }
                    ts.End();
                }
                finally
                {
                    IOUtils.DisposeWhileHandlingException(ts);
                }
            }
            String[] sentences = Regex.Split(line, "、|。").TrimEnd();
            if (Verbose)
            {
                Console.WriteLine("Total time : " + (Environment.TickCount - totalStart));
                Console.WriteLine("Test for Bocchan with pre-splitting sentences (" + sentences.Length + " sentences)");
            }
            totalStart = Environment.TickCount;
            for (int i = 0; i < numIterations; i++)
            {
                foreach (String sentence in sentences)
                {
                    TokenStream ts = analyzer.GetTokenStream("ignored", sentence);
                    try
                    {
                        ts.Reset();
                        while (ts.IncrementToken())
                        {
                            ;
                        }
                        ts.End();
                    }
                    finally
                    {
                        IOUtils.DisposeWhileHandlingException(ts);
                    }
                }
            }
            if (Verbose)
            {
                Console.WriteLine("Total time : " + (Environment.TickCount - totalStart));
            }
        }