Beispiel #1
0
        public override PostingsFormat GetPostingsFormatForField(string name)
        {
            PostingsFormat codec;

            if (!previousMappings.TryGetValue(name, out codec) || codec == null)
            {
                codec = formats[Math.Abs(perFieldSeed ^ name.GetHashCode()) % formats.Count];
                if (codec is SimpleTextPostingsFormat && perFieldSeed % 5 != 0)
                {
                    // make simpletext rarer, choose again
                    codec = formats[Math.Abs(perFieldSeed ^ name.ToUpperInvariant().GetHashCode()) % formats.Count];
                }
                previousMappings[name] = codec;
                // Safety:
                Debug.Assert(previousMappings.Count < 10000, "test went insane");
            }

            //if (LuceneTestCase.VERBOSE)
            //{
            Console.WriteLine("RandomCodec.GetPostingsFormatForField(\"" + name + "\") returned '" + codec.Name + "' with underlying type '" + codec.GetType().ToString() + "'.");
            //}

            return(codec);
        }
Beispiel #2
0
        public void TestRandomChains_()
        {
            int    numIterations = AtLeast(20);
            Random random        = Random;

            for (int i = 0; i < numIterations; i++)
            {
                MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next());
                if (VERBOSE)
                {
                    Console.WriteLine("Creating random analyzer:" + a);
                }
                try
                {
                    CheckRandomData(random, a, 500 * RANDOM_MULTIPLIER, 20, false,
                                    false /* We already validate our own offsets... */);
                }
                catch (Exception /*e*/)
                {
                    Console.WriteLine("Exception from random analyzer: " + a);
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
            }
        }
        public virtual void TestBooleanSpanQuery()
        {
            int       hits            = 0;
            Directory directory       = NewDirectory();
            Analyzer  indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter       writer = new IndexWriter(directory, config);
            string            FIELD  = "content";
            Document          d      = new Document();

            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader   indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher    = NewSearcher(indexReader);

            DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
            SpanQuery           sq1   = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery           sq2   = new SpanTermQuery(new Term(FIELD, "clckwork"));

            query.Add(sq1);
            query.Add(sq2);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);

            searcher.Search(query, collector);
            hits = collector.GetTopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.GetTopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(hits, 1);
            directory.Dispose();
        }
Beispiel #4
0
            public override bool IncrementToken()
            {
                ClearAttributes();
                if (TokenCount >= TokensPerDoc)
                {
                    return(false);
                }
                int shift = 32;

                for (int i = 0; i < 5; i++)
                {
                    Bytes.Bytes[i] = unchecked ((byte)((TermCounter >> shift) & 0xFF));
                    shift         -= 8;
                }
                TermCounter++;
                TokenCount++;
                if (--NextSave == 0)
                {
                    SavedTerms.Add(BytesRef.DeepCopyOf(Bytes));
                    Console.WriteLine("TEST: save term=" + Bytes);
                    NextSave = TestUtil.NextInt(Random, 500000, 1000000);
                }
                return(true);
            }
        public void TestRandomChainsWithLargeStrings()
        {
            int    numIterations = AtLeast(20);
            Random random        = Random();

            for (int i = 0; i < numIterations; i++)
            {
                MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next());
                if (VERBOSE)
                {
                    Console.WriteLine("Creating random analyzer:" + a);
                }
                try
                {
                    CheckRandomData(random, a, 50 * RANDOM_MULTIPLIER, 128, false,
                                    false /* We already validate our own offsets... */);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Exception from random analyzer: " + a);
                    throw e;
                }
            }
        }
            public override void Warm(AtomicReader reader)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: now warm merged reader=" + reader);
                }
#if FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE
                outerInstance.warmed.AddOrUpdate(((SegmentReader)reader).core, true);
#else
                outerInstance.warmed[((SegmentReader)reader).core] = true;
#endif
                int   maxDoc   = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;
                int   sum      = 0;
                int   inc      = Math.Max(1, maxDoc / 50);
                for (int docID = 0; docID < maxDoc; docID += inc)
                {
                    if (liveDocs == null || liveDocs.Get(docID))
                    {
                        Document doc = reader.Document(docID);
                        sum += doc.Fields.Count;
                    }
                }
                IndexSearcher searcher =
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    outerInstance.
#endif
                    NewSearcher(reader);

                sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits;

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: warm visited " + sum + " fields");
                }
            }
Beispiel #7
0
            public override void Run()
            {
                for (int iter = 0; iter < Iters && !Failed.Get(); iter++)
                {
                    //final int x = Random().nextInt(5);
                    int x = Random.Next(3);
                    try
                    {
                        switch (x)
                        {
                        case 0:
                            RollbackLock.@Lock();
                            if (VERBOSE)
                            {
                                Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now rollback");
                            }
                            try
                            {
                                WriterRef.Value.Rollback();
                                if (VERBOSE)
                                {
                                    Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": rollback done; now open new writer");
                                }
                                WriterRef.Value =
                                    new IndexWriter(d, OuterInstance.NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                            }
                            finally
                            {
                                RollbackLock.Unlock();
                            }
                            break;

                        case 1:
                            CommitLock.@Lock();
                            if (VERBOSE)
                            {
                                Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now commit");
                            }
                            try
                            {
                                if (Random.NextBoolean())
                                {
                                    WriterRef.Value.PrepareCommit();
                                }
                                WriterRef.Value.Commit();
                            }
                            catch (ObjectDisposedException)
                            {
                                // ok
                            }
                            catch (NullReferenceException)
                            {
                                // ok
                            }
                            finally
                            {
                                CommitLock.Unlock();
                            }
                            break;

                        case 2:
                            if (VERBOSE)
                            {
                                Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now add");
                            }
                            try
                            {
                                WriterRef.Value.AddDocument(Docs.NextDoc());
                            }
                            catch (ObjectDisposedException)
                            {
                                // ok
                            }
                            catch (System.NullReferenceException)
                            {
                                // ok
                            }
                            catch (InvalidOperationException)
                            {
                                // ok
                            }
                            break;
                        }
                    }
                    catch (Exception t)
                    {
                        Failed.Set(true);
                        throw new Exception(t.Message, t);
                    }
                }
            }
Beispiel #8
0
        public virtual void TestCloseWithThreads([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            int NUM_THREADS   = 3;
            int numIterations = TEST_NIGHTLY ? 7 : 3;

            for (int iter = 0; iter < numIterations; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory dir    = NewDirectory();
                var       config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                   .SetMaxBufferedDocs(10)
                                   .SetMergeScheduler(newScheduler())
                                   .SetMergePolicy(NewLogMergePolicy(4));
                IndexWriter writer    = new IndexWriter(dir, config);
                var         scheduler = config.mergeScheduler as IConcurrentMergeScheduler;
                if (scheduler != null)
                {
                    scheduler.SetSuppressExceptions();
                }

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, false, NewField)

                                 // LUCENENET NOTE - ConcurrentMergeScheduler
                                 // used to take too long for this test to index a single document
                                 // so, increased the time from 200 to 300 ms.
                                 // But it has now been restored to 200 ms like Lucene.
                    {
                        TimeToRunInMilliseconds = 200
                    };
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                bool done = false;
                while (!done)
                {
                    Thread.Sleep(100);
                    for (int i = 0; i < NUM_THREADS; i++)
                    // only stop when at least one thread has added a doc
                    {
                        if (threads[i].AddCount > 0)
                        {
                            done = true;
                            break;
                        }
                        else if (!threads[i].IsAlive)
                        {
                            Assert.Fail("thread failed before indexing a single document");
                        }
                    }
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: now close");
                }
                writer.Dispose(false);

                // Make sure threads that are adding docs are not hung:
                for (int i = 0; i < NUM_THREADS; i++)
                {
                    // Without fix for LUCENE-1130: one of the
                    // threads will hang
                    threads[i].Join();
                    if (threads[i].IsAlive)
                    {
                        Assert.Fail("thread seems to be hung");
                    }
                }

                // Quick test to make sure index is not corrupt:
                IndexReader reader = DirectoryReader.Open(dir);
                DocsEnum    tdocs  = TestUtil.Docs(Random, reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0);
                int         count  = 0;
                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    count++;
                }
                Assert.IsTrue(count > 0);
                reader.Dispose();

                dir.Dispose();
            }
        }
Beispiel #9
0
        public virtual void TestParsingAndSearching()
        {
            string field = "content";

            string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" };

            // queries that should find all docs
            Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) };

            // queries that should find no docs
            Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) };

            PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } };

            WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } };

            // prepare the index
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, docs[i], Field.Store.NO));
                iw.AddDocument(doc);
            }
            iw.Dispose();

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            // test queries that must find all
            foreach (Query q in matchAll)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(docs.Length, hits.Length);
            }

            // test queries that must find none
            foreach (Query q in matchNone)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(0, hits.Length);
            }

            // thest the prefi queries find only one doc
            for (int i = 0; i < matchOneDocPrefix.Length; i++)
            {
                for (int j = 0; j < matchOneDocPrefix[i].Length; j++)
                {
                    Query q = matchOneDocPrefix[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            // test the wildcard queries find only one doc
            for (int i = 0; i < matchOneDocWild.Length; i++)
            {
                for (int j = 0; j < matchOneDocWild[i].Length; j++)
                {
                    Query q = matchOneDocWild[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            reader.Dispose();
            dir.Dispose();
        }
Beispiel #10
0
        public virtual void Test()
        {
            Random       random   = new Random(Random.Next());
            LineFileDocs docs     = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            Directory    d        = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                LuceneTestCase.Random, d, analyzer);
            int numDocs = AtLeast(10);

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                w.AddDocument(docs.NextDoc());
            }
            IndexReader r = w.GetReader();

            w.Dispose();

            List <BytesRef> terms     = new List <BytesRef>();
            TermsEnum       termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null);
            BytesRef        term;

            while ((term = termsEnum.Next()) != null)
            {
                terms.Add(BytesRef.DeepCopyOf(term));
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + terms.Count + " terms");
            }

            int upto  = -1;
            int iters = AtLeast(200);

            for (int iter = 0; iter < iters; iter++)
            {
                bool isEnd;
                if (upto != -1 && LuceneTestCase.Random.NextBoolean())
                {
                    // next
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter next");
                    }
                    isEnd = termsEnum.Next() == null;
                    upto++;
                    if (isEnd)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  end");
                        }
                        Assert.AreEqual(upto, terms.Count);
                        upto = -1;
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString());
                        }
                        Assert.IsTrue(upto < terms.Count);
                        Assert.AreEqual(terms[upto], termsEnum.Term);
                    }
                }
                else
                {
                    BytesRef target;
                    string   exists;
                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        // likely fake term
                        if (LuceneTestCase.Random.NextBoolean())
                        {
                            target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random));
                        }
                        else
                        {
                            target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random));
                        }
                        exists = "likely not";
                    }
                    else
                    {
                        // real term
                        target = terms[LuceneTestCase.Random.Next(terms.Count)];
                        exists = "yes";
                    }

                    upto = terms.BinarySearch(target);

                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekCeil
                        TermsEnum.SeekStatus status = termsEnum.SeekCeil(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + status);
                        }

                        if (upto < 0)
                        {
                            upto = -(upto + 1);
                            if (upto >= terms.Count)
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.END, status);
                                upto = -1;
                            }
                            else
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);
                                Assert.AreEqual(terms[upto], termsEnum.Term);
                            }
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status);
                            Assert.AreEqual(terms[upto], termsEnum.Term);
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekExact
                        bool result = termsEnum.SeekExact(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + result);
                        }
                        if (upto < 0)
                        {
                            Assert.IsFalse(result);
                            upto = -1;
                        }
                        else
                        {
                            Assert.IsTrue(result);
                            Assert.AreEqual(target, termsEnum.Term);
                        }
                    }
                }
            }

            r.Dispose();
            d.Dispose();
            docs.Dispose();
        }
Beispiel #11
0
            public override SeekStatus SeekCeil(BytesRef term)
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }
                skipNext = false;
                TermInfosReader tis = outerInstance.TermsDict;
                Term            t0  = new Term(fieldInfo.Name, term);

                Debug.Assert(termEnum != null);

                tis.SeekEnum(termEnum, t0, false);

                Term t = termEnum.Term();

                if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes))
                {
                    // If we found an exact match, no need to do the
                    // surrogate dance
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek exact match");
                    }
                    current = t.Bytes;
                    return(SeekStatus.FOUND);
                }
                else if (t == null || t.Field != internedFieldName)
                {
                    // TODO: maybe we can handle this like the next()
                    // into null?  set term as prevTerm then dance?

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit EOF");
                    }

                    // We hit EOF; try end-case surrogate dance: if we
                    // find an E, try swapping in S, backwards:
                    scratchTerm.CopyBytes(term);

                    Debug.Assert(scratchTerm.Offset == 0);

                    for (int i = scratchTerm.Length - 1; i >= 0; i--)
                    {
                        if (IsHighBMPChar(scratchTerm.Bytes, i))
                        {
                            if (DEBUG_SURROGATES)
                            {
                                Console.WriteLine("    found E pos=" + i + "; try seek");
                            }

                            if (SeekToNonBMP(seekTermEnum, scratchTerm, i))
                            {
                                scratchTerm.CopyBytes(seekTermEnum.Term().Bytes);
                                outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false);

                                newSuffixStart = 1 + i;

                                DoPushes();

                                // Found a match
                                // TODO: faster seek?
                                current = termEnum.Term().Bytes;
                                return(SeekStatus.NOT_FOUND);
                            }
                        }
                    }

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek END");
                    }

                    current = null;
                    return(SeekStatus.END);
                }
                else
                {
                    // We found a non-exact but non-null term; this one
                    // is fun -- just treat it like next, by pretending
                    // requested term was prev:
                    prevTerm.CopyBytes(term);

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text()));
                    }

                    BytesRef br = t.Bytes;
                    Debug.Assert(br.Offset == 0);

                    SetNewSuffixStart(term, br);

                    SurrogateDance();

                    Term t2 = termEnum.Term();
                    if (t2 == null || t2.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        current = null;
                        return(SeekStatus.END);
                    }
                    else
                    {
                        current = t2.Bytes;
                        Debug.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString()));
                        return(SeekStatus.NOT_FOUND);
                    }
                }
            }
Beispiel #12
0
            // Pre-flex indices store terms in UTF16 sort order, but
            // certain queries require Unicode codepoint order; this
            // method carefully seeks around surrogates to handle
            // this impedance mismatch

            private void SurrogateDance()
            {
                if (!unicodeSortOrder)
                {
                    return;
                }

                // We are invoked after TIS.next() (by UTF16 order) to
                // possibly seek to a different "next" (by unicode
                // order) term.

                // We scan only the "delta" from the last term to the
                // current term, in UTF8 bytes.  We look at 1) the bytes
                // stripped from the prior term, and then 2) the bytes
                // appended to that prior term's prefix.

                // We don't care about specific UTF8 sequences, just
                // the "category" of the UTF16 character.  Category S
                // is a high/low surrogate pair (it non-BMP).
                // Category E is any BMP char > UNI_SUR_LOW_END (and <
                // U+FFFF). Category A is the rest (any unicode char
                // <= UNI_SUR_HIGH_START).

                // The core issue is that pre-flex indices sort the
                // characters as ASE, while flex must sort as AES.  So
                // when scanning, when we hit S, we must 1) seek
                // forward to E and enum the terms there, then 2) seek
                // back to S and enum all terms there, then 3) seek to
                // after E.  Three different seek points (1, 2, 3).

                // We can easily detect S in UTF8: if a byte has
                // prefix 11110 (0xf0), then that byte and the
                // following 3 bytes encode a single unicode codepoint
                // in S.  Similarly, we can detect E: if a byte has
                // prefix 1110111 (0xee), then that byte and the
                // following 2 bytes encode a single unicode codepoint
                // in E.

                // Note that this is really a recursive process --
                // maybe the char at pos 2 needs to dance, but any
                // point in its dance, suddenly pos 4 needs to dance
                // so you must finish pos 4 before returning to pos
                // 2.  But then during pos 4's dance maybe pos 7 needs
                // to dance, etc.  However, despite being recursive,
                // we don't need to hold any state because the state
                // can always be derived by looking at prior term &
                // current term.

                // TODO: can we avoid this copy?
                if (termEnum.Term() == null || termEnum.Term().Field != internedFieldName)
                {
                    scratchTerm.Length = 0;
                }
                else
                {
                    scratchTerm.CopyBytes(termEnum.Term().Bytes);
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  dance");
                    Console.WriteLine("    prev=" + UnicodeUtil.ToHexString(prevTerm.Utf8ToString()));
                    Console.WriteLine("         " + prevTerm.ToString());
                    Console.WriteLine("    term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()));
                    Console.WriteLine("         " + scratchTerm.ToString());
                }

                // this code assumes TermInfosReader/SegmentTermEnum
                // always use BytesRef.offset == 0
                Debug.Assert(prevTerm.Offset == 0);
                Debug.Assert(scratchTerm.Offset == 0);

                // Need to loop here because we may need to do multiple
                // pops, and possibly a continue in the end, ie:
                //
                //  cont
                //  pop, cont
                //  pop, pop, cont
                //  <nothing>
                //

                while (true)
                {
                    if (DoContinue())
                    {
                        break;
                    }
                    else
                    {
                        if (!DoPop())
                        {
                            break;
                        }
                    }
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  finish bmp ends");
                }

                DoPushes();
            }
Beispiel #13
0
        // FST is complete
        private void VerifyUnPruned(int inputMode, FST <T> fst)
        {
            FST <long?>  fstLong;
            ISet <long?> validOutputs;
            long         minLong = long.MaxValue;
            long         maxLong = long.MinValue;

            if (doReverseLookup)
            {
                FST <long?> fstLong0 = fst as FST <long?>;
                fstLong      = fstLong0;
                validOutputs = new HashSet <long?>();
                foreach (InputOutput <T> pair in pairs)
                {
                    long?output = pair.Output as long?;
                    maxLong = Math.Max(maxLong, output.Value);
                    minLong = Math.Min(minLong, output.Value);
                    validOutputs.Add(output.Value);
                }
            }
            else
            {
                fstLong      = null;
                validOutputs = null;
            }

            if (pairs.Count == 0)
            {
                Assert.IsNull(fst);
                return;
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now verify " + pairs.Count + " terms");
                foreach (InputOutput <T> pair in pairs)
                {
                    Assert.IsNotNull(pair);
                    Assert.IsNotNull(pair.Input);
                    Assert.IsNotNull(pair.Output);
                    Console.WriteLine("  " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output));
                }
            }

            Assert.IsNotNull(fst);

            // visit valid pairs in order -- make sure all words
            // are accepted, and FSTEnum's next() steps through
            // them correctly
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: check valid terms/next()");
            }
            {
                Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst);
                foreach (InputOutput <T> pair in pairs)
                {
                    Int32sRef term = pair.Input;
                    if (LuceneTestCase.VERBOSE)
                    {
                        Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output));
                    }
                    T output = Run(fst, term, null);
                    Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted");
                    Assert.IsTrue(OutputsEqual(pair.Output, output));

                    // verify enum's next
                    Int32sRefFSTEnum.InputOutput <T> t = fstEnum.Next();
                    Assert.IsNotNull(t);
                    Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input));
                    Assert.IsTrue(OutputsEqual(pair.Output, t.Output));
                }
                Assert.IsNull(fstEnum.Next());
            }

            IDictionary <Int32sRef, T> termsMap = new Dictionary <Int32sRef, T>();

            foreach (InputOutput <T> pair in pairs)
            {
                termsMap[pair.Input] = pair.Output;
            }

            if (doReverseLookup && maxLong > minLong)
            {
                // Do random lookups so we test null (output doesn't
                // exist) case:
                Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7));
                Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7));

                int num = LuceneTestCase.AtLeast(random, 100);
                for (int iter = 0; iter < num; iter++)
                {
                    long      v     = TestUtil.NextInt64(random, minLong, maxLong);
                    Int32sRef input = Util.GetByOutput(fstLong, v);
                    Assert.IsTrue(validOutputs.Contains(v) || input == null);
                }
            }

            // find random matching word and make sure it's valid
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify random accepted terms");
            }
            Int32sRef scratch = new Int32sRef(10);
            int       num_    = LuceneTestCase.AtLeast(random, 500);

            for (int iter = 0; iter < num_; iter++)
            {
                T output = RandomAcceptedWord(fst, scratch);
                Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid");
                Assert.IsTrue(OutputsEqual(termsMap[scratch], output));

                if (doReverseLookup)
                {
                    //System.out.println("lookup output=" + output + " outs=" + fst.Outputs);
                    Int32sRef input = Util.GetByOutput(fstLong, (output as long?).Value);
                    Assert.IsNotNull(input);
                    //System.out.println("  got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
                    Assert.AreEqual(scratch, input);
                }
            }

            // test IntsRefFSTEnum.Seek:
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify seek");
            }
            Int32sRefFSTEnum <T> fstEnum_ = new Int32sRefFSTEnum <T>(fst);

            num_ = LuceneTestCase.AtLeast(random, 100);
            for (int iter = 0; iter < num_; iter++)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  iter=" + iter);
                }
                if (random.NextBoolean())
                {
                    // seek to term that doesn't exist:
                    while (true)
                    {
                        Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode);
                        int       pos  = pairs.BinarySearch(new InputOutput <T>(term, default(T)));
                        if (pos < 0)
                        {
                            pos = -(pos + 1);
                            // ok doesn't exist
                            //System.out.println("  seek " + inputToString(inputMode, term));
                            Int32sRefFSTEnum.InputOutput <T> seekResult;
                            if (random.Next(3) == 0)
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekExact term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekExact(term);
                                pos        = -1;
                            }
                            else if (random.NextBoolean())
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekFloor term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekFloor(term);
                                pos--;
                            }
                            else
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekCeil term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekCeil(term);
                            }

                            if (pos != -1 && pos < pairs.Count)
                            {
                                //System.out.println("    got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output));
                                Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, pairs[pos].Input));
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("    got " + InputToString(inputMode, seekResult.Input));
                                }
                                Assert.AreEqual(pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input));
                                Assert.IsTrue(OutputsEqual(pairs[pos].Output, seekResult.Output));
                            }
                            else
                            {
                                // seeked before start or beyond end
                                //System.out.println("seek=" + seekTerm);
                                Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input)));
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("    got null");
                                }
                            }

                            break;
                        }
                    }
                }
                else
                {
                    // seek to term that does exist:
                    InputOutput <T> pair = pairs[random.Next(pairs.Count)];
                    Int32sRefFSTEnum.InputOutput <T> seekResult;
                    if (random.Next(3) == 2)
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekExact term=" + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekExact(pair.Input);
                    }
                    else if (random.NextBoolean())
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekFloor " + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekFloor(pair.Input);
                    }
                    else
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekCeil " + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekCeil(pair.Input);
                    }
                    Assert.IsNotNull(seekResult);
                    Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input));
                    Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output));
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: mixed next/seek");
            }

            // test mixed next/seek
            num_ = LuceneTestCase.AtLeast(random, 100);
            for (int iter = 0; iter < num_; iter++)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("TEST: iter " + iter);
                }
                // reset:
                fstEnum_ = new Int32sRefFSTEnum <T>(fst);
                int upto = -1;
                while (true)
                {
                    bool isDone = false;
                    if (upto == pairs.Count - 1 || random.NextBoolean())
                    {
                        // next
                        upto++;
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do next");
                        }
                        isDone = fstEnum_.Next() == null;
                    }
                    else if (upto != -1 && upto < 0.75 * pairs.Count && random.NextBoolean())
                    {
                        int attempt = 0;
                        for (; attempt < 10; attempt++)
                        {
                            Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode);
                            if (!termsMap.ContainsKey(term) && term.CompareTo(pairs[upto].Input) > 0)
                            {
                                int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T)));
                                Debug.Assert(pos < 0);
                                upto = -(pos + 1);

                                if (random.NextBoolean())
                                {
                                    upto--;
                                    Assert.IsTrue(upto != -1);
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("  do non-exist seekFloor(" + InputToString(inputMode, term) + ")");
                                    }
                                    isDone = fstEnum_.SeekFloor(term) == null;
                                }
                                else
                                {
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("  do non-exist seekCeil(" + InputToString(inputMode, term) + ")");
                                    }
                                    isDone = fstEnum_.SeekCeil(term) == null;
                                }

                                break;
                            }
                        }
                        if (attempt == 10)
                        {
                            continue;
                        }
                    }
                    else
                    {
                        int inc = random.Next(pairs.Count - upto - 1);
                        upto += inc;
                        if (upto == -1)
                        {
                            upto = 0;
                        }

                        if (random.NextBoolean())
                        {
                            if (LuceneTestCase.VERBOSE)
                            {
                                Console.WriteLine("  do seekCeil(" + InputToString(inputMode, pairs[upto].Input) + ")");
                            }
                            isDone = fstEnum_.SeekCeil(pairs[upto].Input) == null;
                        }
                        else
                        {
                            if (LuceneTestCase.VERBOSE)
                            {
                                Console.WriteLine("  do seekFloor(" + InputToString(inputMode, pairs[upto].Input) + ")");
                            }
                            isDone = fstEnum_.SeekFloor(pairs[upto].Input) == null;
                        }
                    }
                    if (LuceneTestCase.VERBOSE)
                    {
                        if (!isDone)
                        {
                            Console.WriteLine("    got " + InputToString(inputMode, fstEnum_.Current.Input));
                        }
                        else
                        {
                            Console.WriteLine("    got null");
                        }
                    }

                    if (upto == pairs.Count)
                    {
                        Assert.IsTrue(isDone);
                        break;
                    }
                    else
                    {
                        Assert.IsFalse(isDone);
                        Assert.AreEqual(pairs[upto].Input, fstEnum_.Current.Input);
                        Assert.IsTrue(OutputsEqual(pairs[upto].Output, fstEnum_.Current.Output));

                        /*
                         * if (upto < pairs.size()-1) {
                         * int tryCount = 0;
                         * while(tryCount < 10) {
                         * final IntsRef t = toIntsRef(getRandomString(), inputMode);
                         * if (pairs.get(upto).input.compareTo(t) < 0) {
                         * final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
                         * if (LuceneTestCase.VERBOSE) {
                         * System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
                         * }
                         * Assert.AreEqual(expected, fstEnum.beforeNext(t));
                         * break;
                         * }
                         * tryCount++;
                         * }
                         * }
                         */
                    }
                }
            }
        }
Beispiel #14
0
        internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing)
        {
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
            }

            bool willRewrite = random.NextBoolean();

            Builder <T> builder = new Builder <T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
                                                  prune1, prune2,
                                                  prune1 == 0 && prune2 == 0,
                                                  allowRandomSuffixSharing ? random.NextBoolean() : true,
                                                  allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue,
                                                  outputs,
                                                  null,
                                                  willRewrite,
                                                  PackedInt32s.DEFAULT,
                                                  true,
                                                  15);

            if (LuceneTestCase.VERBOSE)
            {
                if (willRewrite)
                {
                    Console.WriteLine("TEST: packed FST");
                }
                else
                {
                    Console.WriteLine("TEST: non-packed FST");
                }
            }

            foreach (InputOutput <T> pair in pairs)
            {
                if (pair.Output is IEnumerable)
                {
                    Builder <object> builderObject = builder as Builder <object>;
                    var values = pair.Output as IEnumerable;
                    foreach (object value in values)
                    {
                        builderObject.Add(pair.Input, value);
                    }
                }
                else
                {
                    builder.Add(pair.Input, pair.Output);
                }
            }
            FST <T> fst = builder.Finish();

            if (random.NextBoolean() && fst != null && !willRewrite)
            {
                IOContext context = LuceneTestCase.NewIOContext(random);
                using (IndexOutput @out = dir.CreateOutput("fst.bin", context))
                {
                    fst.Save(@out);
                }
                IndexInput @in = dir.OpenInput("fst.bin", context);
                try
                {
                    fst = new FST <T>(@in, outputs);
                }
                finally
                {
                    @in.Dispose();
                    dir.DeleteFile("fst.bin");
                }
            }

            if (LuceneTestCase.VERBOSE && pairs.Count <= 20 && fst != null)
            {
                using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8))
                {
                    Util.ToDot(fst, w, false, false);
                }
                Console.WriteLine("SAVED out.dot");
            }

            if (LuceneTestCase.VERBOSE)
            {
                if (fst == null)
                {
                    Console.WriteLine("  fst has 0 nodes (fully pruned)");
                }
                else
                {
                    Console.WriteLine("  fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs");
                }
            }

            if (prune1 == 0 && prune2 == 0)
            {
                VerifyUnPruned(inputMode, fst);
            }
            else
            {
                VerifyPruned(inputMode, fst, prune1, prune2);
            }

            return(fst);
        }
 public override void Run()
 {
     if (VERBOSE)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (Environment.TickCount < stopTimeMS)
     {
         try
         {
             IndexSearcher s = outerInstance.GetCurrentSearcher();
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     string source;
                     diagnostics.TryGetValue("source", out source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.ContainsKey(segReader.core));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetIterator(null);
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount.Get() < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount.Get() / 30;
                         shift   = Random.Next(trigger);
                     }
                     while (Environment.TickCount < stopTimeMS)
                     {
                         BytesRef term = termsEnum.Next();
                         if (term == null)
                         {
                             totTermCount.Set(seenTermCount);
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t)
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.m_failed.Set(true);
             Console.WriteLine(t.ToString());
             throw new Exception(t.ToString(), t);
         }
     }
 }
Beispiel #16
0
            // Swap in S, in place of E:
            private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos)
            {
                int savLength = term.Length;

                Debug.Assert(term.Offset == 0);

                // The 3 bytes starting at downTo make up 1
                // unicode character:
                Debug.Assert(IsHighBMPChar(term.Bytes, pos));

                // NOTE: we cannot make this assert, because
                // AutomatonQuery legitimately sends us malformed UTF8
                // (eg the UTF8 bytes with just 0xee)
                // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();

                // Save the bytes && length, since we need to
                // restore this if seek "back" finds no matching
                // terms
                if (term.Bytes.Length < 4 + pos)
                {
                    term.Grow(4 + pos);
                }

                scratch[0] = (sbyte)term.Bytes[pos];
                scratch[1] = (sbyte)term.Bytes[pos + 1];
                scratch[2] = (sbyte)term.Bytes[pos + 2];

                term.Bytes[pos]     = 0xf0;
                term.Bytes[pos + 1] = 0x90;
                term.Bytes[pos + 2] = 0x80;
                term.Bytes[pos + 3] = 0x80;
                term.Length         = 4 + pos;

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }

                // Seek "back":
                outerInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true);

                // Test if the term we seek'd to in fact found a
                // surrogate pair at the same position as the E:
                Term t2 = te.Term();

                // Cannot be null (or move to next field) because at
                // "worst" it'd seek to the same term we are on now,
                // unless we are being called from seek
                if (t2 == null || t2.Field != internedFieldName)
                {
                    return(false);
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text()));
                }

                // Now test if prefix is identical and we found
                // a non-BMP char at the same position:
                BytesRef b2 = t2.Bytes;

                Debug.Assert(b2.Offset == 0);

                bool matches;

                if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos))
                {
                    matches = true;
                    for (int i = 0; i < pos; i++)
                    {
                        if (term.Bytes[i] != b2.Bytes[i])
                        {
                            matches = false;
                            break;
                        }
                    }
                }
                else
                {
                    matches = false;
                }

                // Restore term:
                term.Length         = savLength;
                term.Bytes[pos]     = (byte)scratch[0];
                term.Bytes[pos + 1] = (byte)scratch[1];
                term.Bytes[pos + 2] = (byte)scratch[2];

                return(matches);
            }
Beispiel #17
0
            // Look for seek type 3 ("pop"): if the delta from
            // prev -> current was replacing an S with an E,
            // we must now seek to beyond that E.  this seek
            // "finishes" the dance at this character
            // position.
            private bool DoPop()
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  try pop");
                }

                Debug.Assert(newSuffixStart <= prevTerm.Length);
                Debug.Assert(newSuffixStart < scratchTerm.Length || newSuffixStart == 0);

                if (prevTerm.Length > newSuffixStart && IsNonBMPChar(prevTerm.Bytes, newSuffixStart) && IsHighBMPChar(scratchTerm.Bytes, newSuffixStart))
                {
                    // Seek type 2 -- put 0xFF at this position:
                    scratchTerm.Bytes[newSuffixStart] = 0xff;
                    scratchTerm.Length = newSuffixStart + 1;

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("    seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString());
                    }

                    // TODO: more efficient seek?  can we simply swap
                    // the enums?
                    outerInstance.TermsDict.SeekEnum(termEnum, new Term(fieldInfo.Name, scratchTerm), true);

                    Term t2 = termEnum.Term();

                    // We could hit EOF or different field since this
                    // was a seek "forward":
                    if (t2 != null && t2.Field == internedFieldName)
                    {
                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + t2.Bytes);
                        }

                        BytesRef b2 = t2.Bytes;
                        Debug.Assert(b2.Offset == 0);

                        // Set newSuffixStart -- we can't use
                        // termEnum's since the above seek may have
                        // done no scanning (eg, term was precisely
                        // and index term, or, was in the term seek
                        // cache):
                        scratchTerm.CopyBytes(b2);
                        SetNewSuffixStart(prevTerm, scratchTerm);

                        return(true);
                    }
                    else if (newSuffixStart != 0 || scratchTerm.Length != 0)
                    {
                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("      got term=null (or next field)");
                        }
                        newSuffixStart     = 0;
                        scratchTerm.Length = 0;
                        return(true);
                    }
                }

                return(false);
            }
Beispiel #18
0
        // FST is pruned
        private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2)
        {
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now verify pruned " + pairs.Count + " terms; outputs=" + outputs);
                foreach (InputOutput <T> pair in pairs)
                {
                    Console.WriteLine("  " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output));
                }
            }

            // To validate the FST, we brute-force compute all prefixes
            // in the terms, matched to their "common" outputs, prune that
            // set according to the prune thresholds, then assert the FST
            // matches that same set.

            // NOTE: Crazy RAM intensive!!

            //System.out.println("TEST: tally prefixes");

            // build all prefixes
            IDictionary <Int32sRef, CountMinOutput <T> > prefixes = new HashMap <Int32sRef, CountMinOutput <T> >();
            Int32sRef scratch = new Int32sRef(10);

            foreach (InputOutput <T> pair in pairs)
            {
                scratch.CopyInt32s(pair.Input);
                for (int idx = 0; idx <= pair.Input.Length; idx++)
                {
                    scratch.Length = idx;
                    CountMinOutput <T> cmo = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
                    if (cmo == null)
                    {
                        cmo        = new CountMinOutput <T>();
                        cmo.Count  = 1;
                        cmo.Output = pair.Output;
                        prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo;
                    }
                    else
                    {
                        cmo.Count++;
                        T output1 = cmo.Output;
                        if (output1.Equals(outputs.NoOutput))
                        {
                            output1 = outputs.NoOutput;
                        }
                        T output2 = pair.Output;
                        if (output2.Equals(outputs.NoOutput))
                        {
                            output2 = outputs.NoOutput;
                        }
                        cmo.Output = outputs.Common(output1, output2);
                    }
                    if (idx == pair.Input.Length)
                    {
                        cmo.IsFinal     = true;
                        cmo.FinalOutput = cmo.Output;
                    }
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now prune");
            }


            // prune 'em
            // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since
            // in .NET you cannot delete records while enumerating forward through a dictionary.
            for (int i = prefixes.Count - 1; i >= 0; i--)
            {
                KeyValuePair <Int32sRef, CountMinOutput <T> > ent = prefixes.ElementAt(i);
                Int32sRef          prefix = ent.Key;
                CountMinOutput <T> cmo    = ent.Value;
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal);
                }
                bool keep;
                if (prune1 > 0)
                {
                    keep = cmo.Count >= prune1;
                }
                else
                {
                    Debug.Assert(prune2 > 0);
                    if (prune2 > 1 && cmo.Count >= prune2)
                    {
                        keep = true;
                    }
                    else if (prefix.Length > 0)
                    {
                        // consult our parent
                        scratch.Length = prefix.Length - 1;
                        Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length);
                        CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
                        //System.out.println("    parent count = " + (cmo2 == null ? -1 : cmo2.count));
                        keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
                    }
                    else if (cmo.Count >= prune2)
                    {
                        keep = true;
                    }
                    else
                    {
                        keep = false;
                    }
                }

                if (!keep)
                {
                    prefixes.Remove(prefix);
                    //System.out.println("    remove");
                }
                else
                {
                    // clear isLeaf for all ancestors
                    //System.out.println("    keep");
                    scratch.CopyInt32s(prefix);
                    scratch.Length--;
                    while (scratch.Length >= 0)
                    {
                        CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
                        if (cmo2 != null)
                        {
                            //System.out.println("    clear isLeaf " + inputToString(inputMode, scratch));
                            cmo2.IsLeaf = false;
                        }
                        scratch.Length--;
                    }
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: after prune");
                foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes)
                {
                    Console.WriteLine("  " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal);
                    if (ent.Value.IsFinal)
                    {
                        Console.WriteLine("    finalOutput=" + outputs.OutputToString(ent.Value.FinalOutput));
                    }
                }
            }

            if (prefixes.Count <= 1)
            {
                Assert.IsNull(fst);
                return;
            }

            Assert.IsNotNull(fst);

            // make sure FST only enums valid prefixes
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: check pruned enum");
            }
            Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst);

            Int32sRefFSTEnum.InputOutput <T> current;
            while ((current = fstEnum.Next()) != null)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + outputs.OutputToString(current.Output));
                }
                CountMinOutput <T> cmo = prefixes.ContainsKey(current.Input) ? prefixes[current.Input] : null;
                Assert.IsNotNull(cmo);
                Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal);
                //if (cmo.isFinal && !cmo.isLeaf) {
                if (cmo.IsFinal)
                {
                    Assert.AreEqual(cmo.FinalOutput, current.Output);
                }
                else
                {
                    Assert.AreEqual(cmo.Output, current.Output);
                }
            }

            // make sure all non-pruned prefixes are present in the FST
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify all prefixes");
            }
            int[] stopNode = new int[1];
            foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes)
            {
                if (ent.Key.Length > 0)
                {
                    CountMinOutput <T> cmo = ent.Value;
                    T output = Run(fst, ent.Key, stopNode);
                    if (LuceneTestCase.VERBOSE)
                    {
                        Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + outputs.OutputToString(cmo.Output));
                    }
                    // if (cmo.isFinal && !cmo.isLeaf) {
                    if (cmo.IsFinal)
                    {
                        Assert.AreEqual(cmo.FinalOutput, output);
                    }
                    else
                    {
                        Assert.AreEqual(cmo.Output, output);
                    }
                    Assert.AreEqual(ent.Key.Length, stopNode[0]);
                }
            }
        }
Beispiel #19
0
            // Look for seek type 1 ("push"): if the newly added
            // suffix contains any S, we must try to seek to the
            // corresponding E.  If we find a match, we go there;
            // else we keep looking for additional S's in the new
            // suffix.  this "starts" the dance, at this character
            // position:
            private void DoPushes()
            {
                int upTo = newSuffixStart;

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  try push newSuffixStart=" + newSuffixStart + " scratchLen=" + scratchTerm.Length);
                }

                while (upTo < scratchTerm.Length)
                {
                    if (IsNonBMPChar(scratchTerm.Bytes, upTo) && (upTo > newSuffixStart || (upTo >= prevTerm.Length || (!IsNonBMPChar(prevTerm.Bytes, upTo) && !IsHighBMPChar(prevTerm.Bytes, upTo)))))
                    {
                        // A non-BMP char (4 bytes UTF8) starts here:
                        Debug.Assert(scratchTerm.Length >= upTo + 4);

                        int savLength = scratchTerm.Length;
                        scratch[0] = (sbyte)scratchTerm.Bytes[upTo];
                        scratch[1] = (sbyte)scratchTerm.Bytes[upTo + 1];
                        scratch[2] = (sbyte)scratchTerm.Bytes[upTo + 2];

                        scratchTerm.Bytes[upTo]     = (byte)UTF8_HIGH_BMP_LEAD;
                        scratchTerm.Bytes[upTo + 1] = 0x80;
                        scratchTerm.Bytes[upTo + 2] = 0x80;
                        scratchTerm.Length          = upTo + 3;

                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("    try seek 1 pos=" + upTo + " term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString() + " len=" + scratchTerm.Length);
                        }

                        // Seek "forward":
                        // TODO: more efficient seek?
                        outerInstance.TermsDict.SeekEnum(seekTermEnum, new Term(fieldInfo.Name, scratchTerm), true);

                        scratchTerm.Bytes[upTo]     = (byte)scratch[0];
                        scratchTerm.Bytes[upTo + 1] = (byte)scratch[1];
                        scratchTerm.Bytes[upTo + 2] = (byte)scratch[2];
                        scratchTerm.Length          = savLength;

                        // Did we find a match?
                        Term t2 = seekTermEnum.Term();

                        if (DEBUG_SURROGATES)
                        {
                            if (t2 == null)
                            {
                                Console.WriteLine("      hit term=null");
                            }
                            else
                            {
                                Console.WriteLine("      hit term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + (t2 == null ? null : t2.Bytes));
                            }
                        }

                        // Since this was a seek "forward", we could hit
                        // EOF or a different field:
                        bool matches;

                        if (t2 != null && t2.Field == internedFieldName)
                        {
                            BytesRef b2 = t2.Bytes;
                            Debug.Assert(b2.Offset == 0);
                            if (b2.Length >= upTo + 3 && IsHighBMPChar(b2.Bytes, upTo))
                            {
                                matches = true;
                                for (int i = 0; i < upTo; i++)
                                {
                                    if (scratchTerm.Bytes[i] != b2.Bytes[i])
                                    {
                                        matches = false;
                                        break;
                                    }
                                }
                            }
                            else
                            {
                                matches = false;
                            }
                        }
                        else
                        {
                            matches = false;
                        }

                        if (matches)
                        {
                            if (DEBUG_SURROGATES)
                            {
                                Console.WriteLine("      matches!");
                            }

                            // OK seek "back"
                            // TODO: more efficient seek?
                            outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), true);

                            scratchTerm.CopyBytes(seekTermEnum.Term().Bytes);

                            // +3 because we don't need to check the char
                            // at upTo: we know it's > BMP
                            upTo += 3;

                            // NOTE: we keep iterating, now, since this
                            // can easily "recurse".  Ie, after seeking
                            // forward at a certain char position, we may
                            // find another surrogate in our [new] suffix
                            // and must then do another seek (recurse)
                        }
                        else
                        {
                            upTo++;
                        }
                    }
                    else
                    {
                        upTo++;
                    }
                }
            }
Beispiel #20
0
        public virtual void TestCommitOnCloseForceMerge()
        {
            Directory dir = NewDirectory();

            // Must disable throwing exc on double-write: this
            // test uses IW.rollback which easily results in
            // writing to same file more than once
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).PreventDoubleWrite = false;
            }
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(10)));

            for (int j = 0; j < 17; j++)
            {
                AddDocWithIndex(writer, j);
            }
            writer.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND));
            writer.ForceMerge(1);

            // Open a reader before closing (commiting) the writer:
            DirectoryReader reader = DirectoryReader.Open(dir);

            // Reader should see index as multi-seg at this
            // point:
            Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment");
            reader.Dispose();

            // Abort the writer:
            writer.Rollback();
            TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge");

            // Open a reader after aborting writer:
            reader = DirectoryReader.Open(dir);

            // Reader should still see index as multi-segment
            Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment");
            reader.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: do real full merge");
            }
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND));
            writer.ForceMerge(1);
            writer.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: writer closed");
            }
            TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge");

            // Open a reader after aborting writer:
            reader = DirectoryReader.Open(dir);

            // Reader should see index as one segment
            Assert.AreEqual(1, reader.Leaves.Count, "Reader incorrectly sees more than one segment");
            reader.Dispose();
            dir.Dispose();
        }
Beispiel #21
0
        public virtual void TestIntersectRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            HashSet <string>             terms        = new HashSet <string>();
            ICollection <string>         pendingTerms = new List <string>();
            IDictionary <BytesRef, int?> termToID     = new Dictionary <BytesRef, int?>();
            int id = 0;

            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random.Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]         termsArray = new BytesRef[terms.Count];
            HashSet <BytesRef> termsSet   = new HashSet <BytesRef>();

            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                HashSet <string>     acceptTerms       = new HashSet <string>();
                SortedSet <BytesRef> sortedAcceptTerms = new SortedSet <BytesRef>();
                double    keepPct = Random.NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random.NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random.NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]         acceptTermsArray = new BytesRef[acceptTerms.Count];
                HashSet <BytesRef> acceptTermsSet   = new HashSet <BytesRef>();
                int upto = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        BytesRef actual   = te.Next();
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq);
                        docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsNull(te.Next());
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Beispiel #22
0
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (VERBOSE)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.GetIterator(null);

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                {
                    docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetIterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
            {
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)
                {
                    break;
                }

                termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception /*t*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception /*e*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.GetIterator(null);
                    }
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                    {
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.GetIterator(null);
                    }
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                    {
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }
Beispiel #23
0
        private void TestRandomSeeks(IndexReader r, params string[] validTermStrings)
        {
            BytesRef[] validTerms = new BytesRef[validTermStrings.Length];
            for (int termIDX = 0; termIDX < validTermStrings.Length; termIDX++)
            {
                validTerms[termIDX] = new BytesRef(validTermStrings[termIDX]);
            }
            Array.Sort(validTerms);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + validTerms.Length + " terms:");
                foreach (BytesRef t in validTerms)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " " + t);
                }
            }
            TermsEnum te = MultiFields.GetTerms(r, FIELD).GetIterator(null);

            int END_LOC = -validTerms.Length - 1;

            IList <TermAndState> termStates = new List <TermAndState>();

            for (int iter = 0; iter < 100 * RANDOM_MULTIPLIER; iter++)
            {
                BytesRef  t;
                int       loc;
                TermState termState;
                if (Random.Next(6) == 4)
                {
                    // pick term that doens't exist:
                    t         = GetNonExistTerm(validTerms);
                    termState = null;
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: invalid term=" + t.Utf8ToString());
                    }
                    loc = Array.BinarySearch(validTerms, t);
                }
                else if (termStates.Count != 0 && Random.Next(4) == 1)
                {
                    TermAndState ts = termStates[Random.Next(termStates.Count)];
                    t   = ts.Term;
                    loc = Array.BinarySearch(validTerms, t);
                    Assert.IsTrue(loc >= 0);
                    termState = ts.State;
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: valid termState term=" + t.Utf8ToString());
                    }
                }
                else
                {
                    // pick valid term
                    loc       = Random.Next(validTerms.Length);
                    t         = BytesRef.DeepCopyOf(validTerms[loc]);
                    termState = null;
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: valid term=" + t.Utf8ToString());
                    }
                }

                // seekCeil or seekExact:
                bool doSeekExact = Random.NextBoolean();
                if (termState != null)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  seekExact termState");
                    }
                    te.SeekExact(t, termState);
                }
                else if (doSeekExact)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  seekExact");
                    }
                    Assert.AreEqual(loc >= 0, te.SeekExact(t));
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  seekCeil");
                    }

                    TermsEnum.SeekStatus result = te.SeekCeil(t);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  got " + result);
                    }

                    if (loc >= 0)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.FOUND, result);
                    }
                    else if (loc == END_LOC)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.END, result);
                    }
                    else
                    {
                        Debug.Assert(loc >= -validTerms.Length);
                        Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, result);
                    }
                }

                if (loc >= 0)
                {
                    Assert.AreEqual(t, te.Term);
                }
                else if (doSeekExact)
                {
                    // TermsEnum is unpositioned if seekExact returns false
                    continue;
                }
                else if (loc == END_LOC)
                {
                    continue;
                }
                else
                {
                    loc = -loc - 1;
                    Assert.AreEqual(validTerms[loc], te.Term);
                }

                // Do a bunch of next's after the seek
                int numNext = Random.Next(validTerms.Length);

                for (int nextCount = 0; nextCount < numNext; nextCount++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: next loc=" + loc + " of " + validTerms.Length);
                    }
                    BytesRef t2 = te.Next();
                    loc++;
                    if (loc == validTerms.Length)
                    {
                        Assert.IsNull(t2);
                        break;
                    }
                    else
                    {
                        Assert.AreEqual(validTerms[loc], t2);
                        if (Random.Next(40) == 17 && termStates.Count < 100)
                        {
                            termStates.Add(new TermAndState(validTerms[loc], te.GetTermState()));
                        }
                    }
                }
            }
        }
Beispiel #24
0
            public virtual void IndexDoc()
            {
                Document d = new Document();

                FieldType customType1 = new FieldType(TextField.TYPE_STORED);

                customType1.IsTokenized = false;
                customType1.OmitNorms   = true;

                List <Field> fields   = new List <Field>();
                string       idString = IdString;
                Field        idField  = NewField("id", idString, customType1);

                fields.Add(idField);

                int nFields = NextInt(MaxFields);

                for (int i = 0; i < nFields; i++)
                {
                    FieldType customType = new FieldType();
                    switch (NextInt(4))
                    {
                    case 0:
                        break;

                    case 1:
                        customType.StoreTermVectors = true;
                        break;

                    case 2:
                        customType.StoreTermVectors         = true;
                        customType.StoreTermVectorPositions = true;
                        break;

                    case 3:
                        customType.StoreTermVectors       = true;
                        customType.StoreTermVectorOffsets = true;
                        break;
                    }

                    switch (NextInt(4))
                    {
                    case 0:
                        customType.IsStored  = true;
                        customType.OmitNorms = true;
                        customType.IsIndexed = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(1), customType));
                        break;

                    case 1:
                        customType.IsIndexed   = true;
                        customType.IsTokenized = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
                        break;

                    case 2:
                        customType.IsStored                 = true;
                        customType.StoreTermVectors         = false;
                        customType.StoreTermVectorOffsets   = false;
                        customType.StoreTermVectorPositions = false;
                        fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
                        break;

                    case 3:
                        customType.IsStored    = true;
                        customType.IsIndexed   = true;
                        customType.IsTokenized = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(BigFieldSize), customType));
                        break;
                    }
                }

                if (SameFieldOrder)
                {
                    fields.Sort(fieldNameComparer);
                }
                else
                {
                    // random placement of id field also
                    fields.Swap(NextInt(fields.Count), 0);
                }

                for (int i = 0; i < fields.Count; i++)
                {
                    d.Add(fields[i]);
                }
                if (VERBOSE)
                {
                    Console.WriteLine(Thread.CurrentThread.Name + ": indexing id:" + idString);
                }
                w.UpdateDocument(new Term("id", idString), d);
                //System.out.println(Thread.currentThread().getName() + ": indexing "+d);
                Docs[idString] = d;
            }
        public virtual void Test()
        {
            MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors"));

            dir.PreventDoubleWrite = false;
            double rate = Random.NextDouble() * 0.01;

            //System.out.println("rate=" + rate);
            dir.RandomIOExceptionRateOnOpen = rate;
            int                  iters       = AtLeast(20);
            LineFileDocs         docs        = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            IndexReader          r           = null;
            DirectoryReader      r2          = null;
            bool                 any         = false;
            MockDirectoryWrapper dirCopy     = null;
            int                  lastNumDocs = 0;

            for (int iter = 0; iter < iters; iter++)
            {
                IndexWriter w = null;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                try
                {
                    MockAnalyzer analyzer = new MockAnalyzer(Random);
                    analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
                    IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

                    if (VERBOSE)
                    {
                        // Do this ourselves instead of relying on LTC so
                        // we see incrementing messageID:
                        iwc.SetInfoStream(new TextWriterInfoStream(Console.Out));
                    }
                    var ms = iwc.MergeScheduler;
                    if (ms is IConcurrentMergeScheduler)
                    {
                        ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                    }
                    w = new IndexWriter(dir, iwc);
                    if (r != null && Random.Next(5) == 3)
                    {
                        if (Random.NextBoolean())
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes IR[]");
                            }
                            w.AddIndexes(new IndexReader[] { r });
                        }
                        else
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes Directory[]");
                            }
                            w.AddIndexes(new Directory[] { dirCopy });
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: addDocument");
                        }
                        w.AddDocument(docs.NextDoc());
                    }
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    w.Dispose();
                    w = null;

                    // NOTE: this is O(N^2)!  Only enable for temporary debugging:
                    //dir.setRandomIOExceptionRateOnOpen(0.0);
                    //TestUtil.CheckIndex(dir);
                    //dir.setRandomIOExceptionRateOnOpen(rate);

                    // Verify numDocs only increases, to catch IndexWriter
                    // accidentally deleting the index:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    Assert.IsTrue(DirectoryReader.IndexExists(dir));
                    if (r2 == null)
                    {
                        r2 = DirectoryReader.Open(dir);
                    }
                    else
                    {
                        DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2);
                        if (r3 != null)
                        {
                            r2.Dispose();
                            r2 = r3;
                        }
                    }
                    Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs);
                    lastNumDocs = r2.NumDocs;
                    //System.out.println("numDocs=" + lastNumDocs);
                    dir.RandomIOExceptionRateOnOpen = rate;

                    any = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": success");
                    }
                }
                catch (IOException ioe)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": exception");
                        Console.WriteLine(ioe.ToString());
                        Console.Write(ioe.StackTrace);
                    }
                    if (w != null)
                    {
                        // NOTE: leave random IO exceptions enabled here,
                        // to verify that rollback does not try to write
                        // anything:
                        w.Rollback();
                    }
                }

                if (any && r == null && Random.NextBoolean())
                {
                    // Make a copy of a non-empty index so we can use
                    // it to addIndexes later:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    r       = DirectoryReader.Open(dir);
                    dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy"));
                    HashSet <string> files = new HashSet <string>();
                    foreach (string file in dir.ListAll())
                    {
                        dir.Copy(dirCopy, file, file, IOContext.DEFAULT);
                        files.Add(file);
                    }
                    dirCopy.Sync(files);
                    // Have IW kiss the dir so we remove any leftover
                    // files ... we can easily have leftover files at
                    // the time we take a copy because we are holding
                    // open a reader:
                    (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose();
                    dirCopy.RandomIOExceptionRate   = rate;
                    dir.RandomIOExceptionRateOnOpen = rate;
                }
            }

            if (r2 != null)
            {
                r2.Dispose();
            }
            if (r != null)
            {
                r.Dispose();
                dirCopy.Dispose();
            }
            dir.Dispose();
        }
Beispiel #26
0
        private PreviousSearchState AssertSame(IndexSearcher mockSearcher, NodeState.ShardIndexSearcher shardSearcher, Query q, Sort sort, PreviousSearchState state)
        {
            int numHits = TestUtil.NextInt(Random(), 1, 100);

            if (state != null && state.SearchAfterLocal == null)
            {
                // In addition to what we last searched:
                numHits += state.NumHitsPaged;
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: query=" + q + " sort=" + sort + " numHits=" + numHits);
                if (state != null)
                {
                    Console.WriteLine("  prev: searchAfterLocal=" + state.SearchAfterLocal + " searchAfterShard=" + state.SearchAfterShard + " numHitsPaged=" + state.NumHitsPaged);
                }
            }

            // Single (mock local) searcher:
            TopDocs hits;

            if (sort == null)
            {
                if (state != null && state.SearchAfterLocal != null)
                {
                    hits = mockSearcher.SearchAfter(state.SearchAfterLocal, q, numHits);
                }
                else
                {
                    hits = mockSearcher.Search(q, numHits);
                }
            }
            else
            {
                hits = mockSearcher.Search(q, numHits, sort);
            }

            // Shard searcher
            TopDocs shardHits;

            if (sort == null)
            {
                if (state != null && state.SearchAfterShard != null)
                {
                    shardHits = shardSearcher.SearchAfter(state.SearchAfterShard, q, numHits);
                }
                else
                {
                    shardHits = shardSearcher.Search(q, numHits);
                }
            }
            else
            {
                shardHits = shardSearcher.Search(q, numHits, sort);
            }

            int numNodes = shardSearcher.NodeVersions.Length;

            int[] @base = new int[numNodes];
            IList <IndexReaderContext> subs = mockSearcher.TopReaderContext.Children;

            Assert.AreEqual(numNodes, subs.Count);

            for (int nodeID = 0; nodeID < numNodes; nodeID++)
            {
                @base[nodeID] = subs[nodeID].DocBaseInParent;
            }

            if (VERBOSE)
            {
                /*
                 * for(int shardID=0;shardID<shardSearchers.Length;shardID++) {
                 * System.out.println("  shard=" + shardID + " maxDoc=" + shardSearchers[shardID].searcher.getIndexReader().MaxDoc);
                 * }
                 */
                Console.WriteLine("  single searcher: " + hits.TotalHits + " totalHits maxScore=" + hits.MaxScore);
                for (int i = 0; i < hits.ScoreDocs.Length; i++)
                {
                    ScoreDoc sd = hits.ScoreDocs[i];
                    Console.WriteLine("    doc=" + sd.Doc + " score=" + sd.Score);
                }
                Console.WriteLine("  shard searcher: " + shardHits.TotalHits + " totalHits maxScore=" + shardHits.MaxScore);
                for (int i = 0; i < shardHits.ScoreDocs.Length; i++)
                {
                    ScoreDoc sd = shardHits.ScoreDocs[i];
                    Console.WriteLine("    doc=" + sd.Doc + " (rebased: " + (sd.Doc + @base[sd.ShardIndex]) + ") score=" + sd.Score + " shard=" + sd.ShardIndex);
                }
            }

            int numHitsPaged;

            if (state != null && state.SearchAfterLocal != null)
            {
                numHitsPaged = hits.ScoreDocs.Length;
                if (state != null)
                {
                    numHitsPaged += state.NumHitsPaged;
                }
            }
            else
            {
                numHitsPaged = hits.ScoreDocs.Length;
            }

            bool moreHits;

            ScoreDoc bottomHit;
            ScoreDoc bottomHitShards;

            if (numHitsPaged < hits.TotalHits)
            {
                // More hits to page through
                moreHits = true;
                if (sort == null)
                {
                    bottomHit = hits.ScoreDocs[hits.ScoreDocs.Length - 1];
                    ScoreDoc sd = shardHits.ScoreDocs[shardHits.ScoreDocs.Length - 1];
                    // Must copy because below we rebase:
                    bottomHitShards = new ScoreDoc(sd.Doc, sd.Score, sd.ShardIndex);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  save bottomHit=" + bottomHit);
                    }
                }
                else
                {
                    bottomHit       = null;
                    bottomHitShards = null;
                }
            }
            else
            {
                Assert.AreEqual(hits.TotalHits, numHitsPaged);
                bottomHit       = null;
                bottomHitShards = null;
                moreHits        = false;
            }

            // Must rebase so Assert.AreEqual passes:
            for (int hitID = 0; hitID < shardHits.ScoreDocs.Length; hitID++)
            {
                ScoreDoc sd = shardHits.ScoreDocs[hitID];
                sd.Doc += @base[sd.ShardIndex];
            }

            TestUtil.AssertEquals(hits, shardHits);

            if (moreHits)
            {
                // Return a continuation:
                return(new PreviousSearchState(q, sort, bottomHit, bottomHitShards, shardSearcher.NodeVersions, numHitsPaged));
            }
            else
            {
                return(null);
            }
        }
Beispiel #27
0
        // Runs test, with multiple threads, using the specific
        // failure to trigger an IOException
        public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, Failure failure)
        {
            int NUM_THREADS = 3;

            for (int iter = 0; iter < 2; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                MockDirectoryWrapper dir = NewMockDirectory();
                var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                             .SetMaxBufferedDocs(2)
                             .SetMergeScheduler(newScheduler())
                             .SetMergePolicy(NewLogMergePolicy(4));
                IndexWriter writer    = new IndexWriter(dir, config);
                var         scheduler = config.mergeScheduler as IConcurrentMergeScheduler;
                if (scheduler != null)
                {
                    scheduler.SetSuppressExceptions();
                }

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, true, NewField);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                Thread.Sleep(10);

                dir.FailOn(failure);
                failure.SetDoFail();

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                    Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable");
                }

                bool success = false;
                try
                {
                    writer.Dispose(false);
                    success = true;
                }
                catch (IOException)
                {
                    failure.ClearDoFail();
                    writer.Dispose(false);
                }
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: success=" + success);
                }

                if (success)
                {
                    IndexReader reader  = DirectoryReader.Open(dir);
                    IBits       delDocs = MultiFields.GetLiveDocs(reader);
                    for (int j = 0; j < reader.MaxDoc; j++)
                    {
                        if (delDocs == null || !delDocs.Get(j))
                        {
                            reader.Document(j);
                            reader.GetTermVectors(j);
                        }
                    }
                    reader.Dispose();
                }

                dir.Dispose();
            }
        }
Beispiel #28
0
        public virtual void TestSimple()
        {
            int numNodes = TestUtil.NextInt(Random(), 1, 10);

            double runTimeSec = AtLeast(3);

            int minDocsToMakeTerms = TestUtil.NextInt(Random(), 5, 20);

            int maxSearcherAgeSeconds = TestUtil.NextInt(Random(), 1, 3);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
            }

            Start(numNodes, runTimeSec, maxSearcherAgeSeconds);

            List <PreviousSearchState> priorSearches = new List <PreviousSearchState>();
            List <BytesRef>            terms         = null;

            while (Time.NanoTime() < endTimeNanos)
            {
                bool doFollowon = priorSearches.Count > 0 && Random().Next(7) == 1;

                // Pick a random node; we will run the query on this node:
                int myNodeID = Random().Next(numNodes);

                NodeState.ShardIndexSearcher localShardSearcher;

                PreviousSearchState prevSearchState;

                if (doFollowon)
                {
                    // Pretend user issued a followon query:
                    prevSearchState = priorSearches[Random().Next(priorSearches.Count)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: follow-on query age=" + ((Time.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0));
                    }

                    try
                    {
                        localShardSearcher = Nodes[myNodeID].Acquire(prevSearchState.Versions);
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected, sometimes; in a "real" app we would
                        // either forward this error to the user ("too
                        // much time has passed; please re-run your
                        // search") or sneakily just switch to newest
                        // searcher w/o telling them...
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during local shard searcher init: " + see);
                        }
                        priorSearches.Remove(prevSearchState);
                        continue;
                    }
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: fresh query");
                    }
                    // Do fresh query:
                    localShardSearcher = Nodes[myNodeID].Acquire();
                    prevSearchState    = null;
                }

                IndexReader[] subs = new IndexReader[numNodes];

                PreviousSearchState searchState = null;

                try
                {
                    // Mock: now make a single reader (MultiReader) from all node
                    // searchers.  In a real shard env you can't do this... we
                    // do it to confirm results from the shard searcher
                    // are correct:
                    int docCount = 0;
                    try
                    {
                        for (int nodeID = 0; nodeID < numNodes; nodeID++)
                        {
                            long          subVersion = localShardSearcher.NodeVersions[nodeID];
                            IndexSearcher sub        = Nodes[nodeID].Searchers.Acquire(subVersion);
                            if (sub == null)
                            {
                                nodeID--;
                                while (nodeID >= 0)
                                {
                                    subs[nodeID].DecRef();
                                    subs[nodeID] = null;
                                    nodeID--;
                                }
                                throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion);
                            }
                            subs[nodeID] = sub.IndexReader;
                            docCount    += subs[nodeID].MaxDoc;
                        }
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during mock reader init: " + see);
                        }
                        continue;
                    }

                    IndexReader   mockReader   = new MultiReader(subs);
                    IndexSearcher mockSearcher = new IndexSearcher(mockReader);

                    Query query;
                    Sort  sort;

                    if (prevSearchState != null)
                    {
                        query = prevSearchState.Query;
                        sort  = prevSearchState.Sort;
                    }
                    else
                    {
                        if (terms == null && docCount > minDocsToMakeTerms)
                        {
                            // TODO: try to "focus" on high freq terms sometimes too
                            // TODO: maybe also periodically reset the terms...?
                            TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").GetIterator(null);
                            terms = new List <BytesRef>();
                            while (termsEnum.Next() != null)
                            {
                                terms.Add(BytesRef.DeepCopyOf(termsEnum.Term));
                            }
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: init terms: " + terms.Count + " terms");
                            }
                            if (terms.Count == 0)
                            {
                                terms = null;
                            }
                        }

                        if (VERBOSE)
                        {
                            Console.WriteLine("  maxDoc=" + mockReader.MaxDoc);
                        }

                        if (terms != null)
                        {
                            if (Random().NextBoolean())
                            {
                                query = new TermQuery(new Term("body", terms[Random().Next(terms.Count)]));
                            }
                            else
                            {
                                string t = terms[Random().Next(terms.Count)].Utf8ToString();
                                string prefix;
                                if (t.Length <= 1)
                                {
                                    prefix = t;
                                }
                                else
                                {
                                    prefix = t.Substring(0, TestUtil.NextInt(Random(), 1, 2));
                                }
                                query = new PrefixQuery(new Term("body", prefix));
                            }

                            if (Random().NextBoolean())
                            {
                                sort = null;
                            }
                            else
                            {
                                // TODO: sort by more than 1 field
                                int what = Random().Next(3);
                                if (what == 0)
                                {
                                    sort = new Sort(SortField.FIELD_SCORE);
                                }
                                else if (what == 1)
                                {
                                    // TODO: this sort doesn't merge
                                    // correctly... it's tricky because you
                                    // could have > 2.1B docs across all shards:
                                    //sort = new Sort(SortField.FIELD_DOC);
                                    sort = null;
                                }
                                else if (what == 2)
                                {
                                    sort = new Sort(new SortField[] { new SortField("docid", SortFieldType.INT32, Random().NextBoolean()) });
                                }
                                else
                                {
                                    sort = new Sort(new SortField[] { new SortField("title", SortFieldType.STRING, Random().NextBoolean()) });
                                }
                            }
                        }
                        else
                        {
                            query = null;
                            sort  = null;
                        }
                    }

                    if (query != null)
                    {
                        try
                        {
                            searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState);
                        }
                        catch (SearcherExpiredException see)
                        {
                            // Expected; in a "real" app we would
                            // either forward this error to the user ("too
                            // much time has passed; please re-run your
                            // search") or sneakily just switch to newest
                            // searcher w/o telling them...
                            if (VERBOSE)
                            {
                                Console.WriteLine("  searcher expired during search: " + see);
                                Console.Out.Write(see.StackTrace);
                            }
                            // We can't do this in general: on a very slow
                            // computer it's possible the local searcher
                            // expires before we can finish our search:
                            // assert prevSearchState != null;
                            if (prevSearchState != null)
                            {
                                priorSearches.Remove(prevSearchState);
                            }
                        }
                    }
                }
                finally
                {
                    Nodes[myNodeID].Release(localShardSearcher);
                    foreach (IndexReader sub in subs)
                    {
                        if (sub != null)
                        {
                            sub.DecRef();
                        }
                    }
                }

                if (searchState != null && searchState.SearchAfterLocal != null && Random().Next(5) == 3)
                {
                    priorSearches.Add(searchState);
                    if (priorSearches.Count > 200)
                    {
                        Collections.Shuffle(priorSearches);
                        priorSearches.SubList(100, priorSearches.Count).Clear();
                    }
                }
            }

            Finish();
        }
Beispiel #29
0
            public override void Run()
            {
                Document  doc        = new Document();
                FieldType customType = new FieldType(TextField.TYPE_STORED);

                customType.StoreTermVectors         = true;
                customType.StoreTermVectorPositions = true;
                customType.StoreTermVectorOffsets   = true;

                doc.Add(NewField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
                doc.Add(new NumericDocValuesField("dv", 5));

                int  idUpto    = 0;
                int  fullCount = 0;
                long stopTime  = Environment.TickCount + TimeToRunInMilliseconds; // LUCENENET specific: added the ability to change how much time to alot

                do
                {
                    try
                    {
                        Writer.UpdateDocument(new Term("id", "" + (idUpto++)), doc);
                        AddCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: expected exc:");
                            Console.WriteLine(ioe.StackTrace);
                        }
                        //System.out.println(Thread.currentThread().getName() + ": hit exc");
                        //ioConsole.WriteLine(e.StackTrace);
                        if (ioe.Message.StartsWith("fake disk full at", StringComparison.Ordinal) || ioe.Message.Equals("now failing on purpose", StringComparison.Ordinal))
                        {
                            DiskFull = true;
//#if !NETSTANDARD1_6
//                            try
//                            {
//#endif
                            Thread.Sleep(1);
//#if !NETSTANDARD1_6
//                            }
//                            catch (ThreadInterruptedException ie) // LUCENENET NOTE: Senseless to catch and rethrow the same exception type
//                            {
//                                throw new ThreadInterruptedException(ie.toString(), ie);
//                            }
//#endif
                            if (fullCount++ >= 5)
                            {
                                break;
                            }
                        }
                        else
                        {
                            if (NoErrors)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected IOException:");
                                Console.WriteLine(ioe.StackTrace);
                                Error = ioe;
                            }
                            break;
                        }
                    }
                    catch (Exception t)
                    {
                        //Console.WriteLine(t.StackTrace);
                        if (NoErrors)
                        {
                            Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected Throwable:");
                            Console.WriteLine(t.StackTrace);
                            Error = t;
                        }
                        break;
                    }
                } while (Environment.TickCount < stopTime);
            }
Beispiel #30
0
            public override BytesRef Next()
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.next()");
                }
                if (skipNext)
                {
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  skipNext=true");
                    }
                    skipNext = false;
                    if (termEnum.Term() == null)
                    {
                        return(null);
                        // PreFlex codec interns field names:
                    }
                    else if (termEnum.Term().Field != internedFieldName)
                    {
                        return(null);
                    }
                    else
                    {
                        return(current = termEnum.Term().Bytes);
                    }
                }

                // TODO: can we use STE's prevBuffer here?
                prevTerm.CopyBytes(termEnum.Term().Bytes);

                if (termEnum.Next() && termEnum.Term().Field == internedFieldName)
                {
                    newSuffixStart = termEnum.newSuffixStart;
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  newSuffixStart=" + newSuffixStart);
                    }
                    SurrogateDance();
                    Term t = termEnum.Term();
                    if (t == null || t.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        current = null;
                    }
                    else
                    {
                        current = t.Bytes;
                    }
                    return(current);
                }
                else
                {
                    // this field is exhausted, but we have to give
                    // surrogateDance a chance to seek back:
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  force cont");
                    }
                    //newSuffixStart = prevTerm.length;
                    newSuffixStart = 0;
                    SurrogateDance();

                    Term t = termEnum.Term();
                    if (t == null || t.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        return(null);
                    }
                    else
                    {
                        current = t.Bytes;
                        return(current);
                    }
                }
            }