Esempio n. 1
0
        public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos segmentInfos)
        {
            MergeSpecification mergeSpec = null;
            //System.out.println("MRMP: findMerges sis=" + segmentInfos);

            int numSegments = segmentInfos.Size();

            IList <SegmentCommitInfo>       segments = new List <SegmentCommitInfo>();
            ICollection <SegmentCommitInfo> merging  = Writer.Get().MergingSegments;

            foreach (SegmentCommitInfo sipc in segmentInfos.Segments)
            {
                if (!merging.Contains(sipc))
                {
                    segments.Add(sipc);
                }
            }

            numSegments = segments.Count;

            if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3))
            {
                segments = CollectionsHelper.Shuffle(segments);

                // TODO: sometimes make more than 1 merge?
                mergeSpec = new MergeSpecification();
                int segsToMerge = TestUtil.NextInt(Random, 1, numSegments);
                mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge)));
            }

            return(mergeSpec);
        }
Esempio n. 2
0
        public void TestHashCodeAndEquals()
        {
            int          num         = AtLeast(100);
            bool         singleField = Random().NextBoolean();
            IList <Term> terms       = new List <Term>();
            var          uniqueTerms = new HashSet <Term>();

            for (int i = 0; i < num; i++)
            {
                string field   = "field" + (singleField ? "1" : Random().Next(100).ToString());
                string @string = TestUtil.RandomRealisticUnicodeString(Random());
                terms.Add(new Term(field, @string));
                uniqueTerms.Add(new Term(field, @string));
                TermsFilter left = TermsFilter(singleField && Random().NextBoolean(), uniqueTerms);
                CollectionsHelper.Shuffle(terms);
                TermsFilter right = TermsFilter(singleField && Random().NextBoolean(), terms);
                assertEquals(right, left);
                assertEquals(right.GetHashCode(), left.GetHashCode());
                if (uniqueTerms.Count > 1)
                {
                    IList <Term> asList = new List <Term>(uniqueTerms);
                    asList.RemoveAt(0);
                    TermsFilter notEqual = TermsFilter(singleField && Random().NextBoolean(), asList);
                    assertFalse(left.Equals(notEqual));
                    assertFalse(right.Equals(notEqual));
                }
            }
        }
        private static IList <Term> Sample(Random random, IndexReader reader, int size)
        {
            IList <Term> sample = new List <Term>();
            Fields       fields = MultiFields.GetFields(reader);

            foreach (string field in fields)
            {
                Terms terms = fields.Terms(field);
                Assert.IsNotNull(terms);
                TermsEnum termsEnum = terms.Iterator(null);
                while (termsEnum.Next() != null)
                {
                    if (sample.Count >= size)
                    {
                        int pos = random.Next(size);
                        sample[pos] = new Term(field, termsEnum.Term());
                    }
                    else
                    {
                        sample.Add(new Term(field, termsEnum.Term()));
                    }
                }
            }
            sample = CollectionsHelper.Shuffle(sample);
            return(sample);
        }
        private static IList <FacetField> RandomCategories(Random random)
        {
            // add random categories from the two dimensions, ensuring that the same
            // category is not added twice.
            int numFacetsA = random.Next(3) + 1; // 1-3
            int numFacetsB = random.Next(2) + 1; // 1-2
            List <FacetField> categories_a = new List <FacetField>();

            categories_a.AddRange(Arrays.AsList(CATEGORIES_A));
            List <FacetField> categories_b = new List <FacetField>();

            categories_b.AddRange(Arrays.AsList(CATEGORIES_B));
            categories_a = CollectionsHelper.Shuffle(categories_a).ToList();
            categories_b = CollectionsHelper.Shuffle(categories_b).ToList();

            List <FacetField> categories = new List <FacetField>();

            categories.AddRange(categories_a.SubList(0, numFacetsA));
            categories.AddRange(categories_b.SubList(0, numFacetsB));

            // add the NO_PARENT categories
            categories.Add(CATEGORIES_C[Random().Next(NUM_CHILDREN_CP_C)]);
            categories.Add(CATEGORIES_D[Random().Next(NUM_CHILDREN_CP_D)]);

            return(categories);
        }
Esempio n. 5
0
        public virtual void TestSeeking()
        {
            for (int i = 0; i < NumIterations; i++)
            {
                string           reg           = AutomatonTestUtil.RandomRegexp(Random());
                Automaton        automaton     = (new RegExp(reg, RegExp.NONE)).ToAutomaton();
                TermsEnum        te            = MultiFields.GetTerms(Reader, "field").Iterator(null);
                IList <BytesRef> unsortedTerms = new List <BytesRef>(Terms);
                unsortedTerms = CollectionsHelper.Shuffle(unsortedTerms);

                foreach (BytesRef term in unsortedTerms)
                {
                    if (BasicOperations.Run(automaton, term.Utf8ToString()))
                    {
                        // term is accepted
                        if (Random().NextBoolean())
                        {
                            // seek exact
                            Assert.IsTrue(te.SeekExact(term));
                        }
                        else
                        {
                            // seek ceil
                            Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term));
                            Assert.AreEqual(term, te.Term());
                        }
                    }
                }
            }
        }
Esempio n. 6
0
 public override void Run()
 {
     try
     {
         StartingGun.Wait();
         for (int i = 0; i < 20; i++)
         {
             IList <KeyValuePair <BytesRef, TopDocs> > shuffled = new List <KeyValuePair <BytesRef, TopDocs> >(Answers.EntrySet());
             shuffled = CollectionsHelper.Shuffle(shuffled);
             foreach (KeyValuePair <BytesRef, TopDocs> ent in shuffled)
             {
                 TopDocs actual   = s.Search(new TermQuery(new Term("body", ent.Key)), 100);
                 TopDocs expected = ent.Value;
                 Assert.AreEqual(expected.TotalHits, actual.TotalHits);
                 Assert.AreEqual(expected.ScoreDocs.Length, actual.ScoreDocs.Length, "query=" + ent.Key.Utf8ToString());
                 for (int hit = 0; hit < expected.ScoreDocs.Length; hit++)
                 {
                     Assert.AreEqual(expected.ScoreDocs[hit].Doc, actual.ScoreDocs[hit].Doc);
                     // Floats really should be identical:
                     Assert.IsTrue(expected.ScoreDocs[hit].Score == actual.ScoreDocs[hit].Score);
                 }
             }
         }
     }
     catch (Exception e)
     {
         throw new Exception(e.Message, e);
     }
 }
Esempio n. 7
0
 public RandomSimilarityProvider(Random random)
 {
     PerFieldSeed    = random.Next();
     CoordType       = random.Next(3);
     ShouldQueryNorm = random.NextBoolean();
     KnownSims       = new List <Similarity>(AllSims);
     KnownSims       = CollectionsHelper.Shuffle(KnownSims);
     //Collections.shuffle(KnownSims, random);
 }
Esempio n. 8
0
        /**
         * Loads terms and frequencies from Wikipedia (cached).
         */

        public override void SetUp()
        {
            Debug.Assert(false, "disable assertions before running benchmarks!");
            IList <Input> input = ReadTop50KWiki();

            input           = CollectionsHelper.Shuffle(input);
            dictionaryInput = input.ToArray();
            input           = CollectionsHelper.Shuffle(input);
            benchmarkInput  = input;
        }
Esempio n. 9
0
        private static void AddSome(Document doc, string[] values)
        {
            IList <string> list = Arrays.AsList(values);

            list = CollectionsHelper.Shuffle(list);
            int howMany = TestUtil.NextInt(Random(), 1, list.Count);

            for (int i = 0; i < howMany; i++)
            {
                doc.Add(new StringField("field", list[i], Field.Store.NO));
                doc.Add(new SortedSetDocValuesField("dv", new BytesRef(list[i])));
            }
        }
Esempio n. 10
0
        public void TestRandom()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter w      = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            int          num         = AtLeast(100);
            bool         singleField = Random().NextBoolean();
            IList <Term> terms       = new List <Term>();

            for (int i = 0; i < num; i++)
            {
                string field   = "field" + (singleField ? "1" : Random().Next(100).ToString());
                string @string = TestUtil.RandomRealisticUnicodeString(Random());
                terms.Add(new Term(field, @string));
                Document doc = new Document();
                doc.Add(NewStringField(field, @string, Field.Store.YES));
                w.AddDocument(doc);
            }
            IndexReader reader = w.Reader;

            w.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            int numQueries = AtLeast(10);

            for (int i = 0; i < numQueries; i++)
            {
                CollectionsHelper.Shuffle(terms);
                int          numTerms = 1 + Random().Next(Math.Min(BooleanQuery.MaxClauseCount, terms.Count));
                BooleanQuery bq       = new BooleanQuery();
                for (int j = 0; j < numTerms; j++)
                {
                    bq.Add(new BooleanClause(new TermQuery(terms[j]), BooleanClause.Occur.SHOULD));
                }
                TopDocs queryResult = searcher.Search(new ConstantScoreQuery(bq), reader.MaxDoc);

                MatchAllDocsQuery matchAll     = new MatchAllDocsQuery();
                TermsFilter       filter       = TermsFilter(singleField, terms.SubList(0, numTerms));
                TopDocs           filterResult = searcher.Search(matchAll, filter, reader.MaxDoc);
                assertEquals(filterResult.TotalHits, queryResult.TotalHits);
                ScoreDoc[] scoreDocs = filterResult.ScoreDocs;
                for (int j = 0; j < scoreDocs.Length; j++)
                {
                    assertEquals(scoreDocs[j].Doc, queryResult.ScoreDocs[j].Doc);
                }
            }

            reader.Dispose();
            dir.Dispose();
        }
Esempio n. 11
0
 public void AssertLexicon(List<Automaton> a, List<string> terms)
 {
     var automata = CollectionsHelper.Shuffle(a);
     var lex = BasicOperations.Union(automata);
     lex.Determinize();
     Assert.IsTrue(SpecialOperations.IsFinite(lex));
     foreach (string s in terms)
     {
         Assert.IsTrue(BasicOperations.Run(lex, s));
     }
     var lexByte = new ByteRunAutomaton(lex);
     foreach (string s in terms)
     {
         sbyte[] bytes = s.GetBytes(Encoding.UTF8);
         Assert.IsTrue(lexByte.Run(bytes, 0, bytes.Length));
     }
 }
Esempio n. 12
0
        public RandomCodec(Random random, ISet <string> avoidCodecs)
        {
            this.PerFieldSeed = random.Next();
            // TODO: make it possible to specify min/max iterms per
            // block via CL:
            int minItemsPerBlock = TestUtil.NextInt(random, 2, 100);
            int maxItemsPerBlock = 2 * (Math.Max(2, minItemsPerBlock - 1)) + random.Next(100);
            int lowFreqCutoff    = TestUtil.NextInt(random, 2, 100);

            Add(avoidCodecs,
                new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock),

                /*
                 * new FSTPostingsFormat(),
                 * new FSTOrdPostingsFormat(),
                 * new FSTPulsing41PostingsFormat(1 + random.Next(20)), new FSTOrdPulsing41PostingsFormat(1 + random.Next(20)),
                 * new DirectPostingsFormat(LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : maxItemsPerBlock), LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : lowFreqCutoff)),
                 * new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock),
                 * new TestBloomFilteredLucene41Postings(), new MockSepPostingsFormat(), new MockFixedIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 2000)),
                 * new MockVariableIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 127)), new MockRandomPostingsFormat(random),
                 * new NestedPulsingPostingsFormat(), new Lucene41WithOrds(), new SimpleTextPostingsFormat(),
                 */
                new AssertingPostingsFormat()
                /*new MemoryPostingsFormat(true, random.nextFloat()), new MemoryPostingsFormat(false, random.nextFloat())*/
                );

            // add pulsing again with (usually) different parameters
            //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed
            //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing
            //with such "wrapper" classes?

            AddDocValues(avoidCodecs, new Lucene45DocValuesFormat(), /*new DiskDocValuesFormat(), new MemoryDocValuesFormat(), new SimpleTextDocValuesFormat(),*/ new AssertingDocValuesFormat());

            Formats   = CollectionsHelper.Shuffle(Formats);
            DvFormats = CollectionsHelper.Shuffle(DvFormats);

            // Avoid too many open files:
            if (Formats.Count > 4)
            {
                Formats = Formats.SubList(0, 4);
            }
            if (DvFormats.Count > 4)
            {
                DvFormats = DvFormats.SubList(0, 4);
            }
        }
Esempio n. 13
0
        /** Creates an index for sorting. */
        public void CreateIndex(Directory dir, int numDocs, Random random)
        {
            IList <int> ids = new List <int>();

            for (int i = 0; i < numDocs; i++)
            {
                ids.Add(i * 10);
            }
            // shuffle them for indexing
            // LUCENENET NOTE: Using LINQ, so we need to reassign the variable with the result
            ids = CollectionsHelper.Shuffle(ids);

            if (VERBOSE)
            {
                Console.WriteLine("Shuffled IDs for indexing: " + Arrays.ToString(ids.ToArray()));
            }

            PositionsTokenStream positions = new PositionsTokenStream();
            IndexWriterConfig    conf      = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetMaxBufferedDocs(4);                               // create some segments
            conf.SetSimilarity(new NormsSimilarity(conf.Similarity)); // for testing norms field
            using (RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf))
            {
                writer.RandomForceMerge = (false);
                foreach (int id in ids)
                {
                    writer.AddDocument(Doc(id, positions));
                }
                // delete some documents
                writer.Commit();
                foreach (int id in ids)
                {
                    if (random.NextDouble() < 0.2)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("delete doc_id " + id);
                        }
                        writer.DeleteDocuments(new Term(ID_FIELD, id.ToString()));
                    }
                }
            }
        }
Esempio n. 14
0
        public override MergeSpecification FindForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            IList <SegmentCommitInfo> eligibleSegments = new List <SegmentCommitInfo>();

            foreach (SegmentCommitInfo info in segmentInfos.Segments)
            {
                if (segmentsToMerge.ContainsKey(info))
                {
                    eligibleSegments.Add(info);
                }
            }

            //System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments);
            MergeSpecification mergeSpec = null;

            if (eligibleSegments.Count > 1 || (eligibleSegments.Count == 1 && eligibleSegments[0].HasDeletions()))
            {
                mergeSpec = new MergeSpecification();
                // Already shuffled having come out of a set but
                // shuffle again for good measure:
                eligibleSegments = CollectionsHelper.Shuffle(eligibleSegments);
                int upto = 0;
                while (upto < eligibleSegments.Count)
                {
                    int max = Math.Min(10, eligibleSegments.Count - upto);
                    int inc = max <= 2 ? max : TestUtil.NextInt(Random, 2, max);
                    mergeSpec.Add(new OneMerge(eligibleSegments.SubList(upto, upto + inc)));
                    upto += inc;
                }
            }

            if (mergeSpec != null)
            {
                foreach (OneMerge merge in mergeSpec.Merges)
                {
                    foreach (SegmentCommitInfo info in merge.Segments)
                    {
                        Debug.Assert(segmentsToMerge.ContainsKey(info));
                    }
                }
            }
            return(mergeSpec);
        }
Esempio n. 15
0
        /// <summary>
        /// Makes a bunch of single-char tokens (the max freq will at most be 255).
        /// shuffles them around, and returns the whole list with Arrays.toString().
        /// this works fine because we use lettertokenizer.
        /// puts the max-frequency term into expected, to be checked against the norm.
        /// </summary>
        private string AddValue()
        {
            IList <string> terms      = new List <string>();
            int            maxCeiling = TestUtil.NextInt(Random(), 0, 255);
            int            max        = 0;

            for (char ch = 'a'; ch <= 'z'; ch++)
            {
                int num = TestUtil.NextInt(Random(), 0, maxCeiling);
                for (int i = 0; i < num; i++)
                {
                    terms.Add(char.ToString(ch));
                }
                max = Math.Max(max, num);
            }
            Expected.Add(max);

            terms = CollectionsHelper.Shuffle(terms);
            return(Arrays.ToString(terms.ToArray()));
        }
Esempio n. 16
0
        public virtual void TestNextVaryingNumberOfTerms()
        {
            IList <string> termsList = new List <string>();

            termsList.AddRange(Arrays.AsList(CommonTerms));
            termsList.AddRange(Arrays.AsList(MediumTerms));
            termsList.AddRange(Arrays.AsList(RareTerms));
            termsList = CollectionsHelper.Shuffle(termsList);

            for (int numTerms = 2; numTerms <= termsList.Count; numTerms++)
            {
                string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/);
                for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
                {
                    Scorer expected = Scorer(terms, minNrShouldMatch, true);
                    Scorer actual   = Scorer(terms, minNrShouldMatch, false);
                    AssertNext(expected, actual);
                }
            }
        }
Esempio n. 17
0
        private void TestSavedTerms(IndexReader r, IList <BytesRef> terms)
        {
            Console.WriteLine("TEST: run " + terms.Count + " terms on reader=" + r);
            IndexSearcher s = NewSearcher(r);

            terms = CollectionsHelper.Shuffle(terms);
            TermsEnum termsEnum = MultiFields.GetTerms(r, "field").Iterator(null);
            bool      failed    = false;

            for (int iter = 0; iter < 10 * terms.Count; iter++)
            {
                BytesRef term = terms[Random().Next(terms.Count)];
                Console.WriteLine("TEST: search " + term);
                long t0    = Environment.TickCount;
                int  count = s.Search(new TermQuery(new Term("field", term)), 1).TotalHits;
                if (count <= 0)
                {
                    Console.WriteLine("  FAILED: count=" + count);
                    failed = true;
                }
                long t1 = Environment.TickCount;
                Console.WriteLine("  took " + (t1 - t0) + " millis");

                TermsEnum.SeekStatus result = termsEnum.SeekCeil(term);
                if (result != TermsEnum.SeekStatus.FOUND)
                {
                    if (result == TermsEnum.SeekStatus.END)
                    {
                        Console.WriteLine("  FAILED: got END");
                    }
                    else
                    {
                        Console.WriteLine("  FAILED: wrong term: got " + termsEnum.Term());
                    }
                    failed = true;
                }
            }
            Assert.IsFalse(failed);
        }
        internal virtual string FieldValue(int maxTF)
        {
            IList <string> shuffled = new List <string>();
            StringBuilder  sb       = new StringBuilder();
            int            i        = Random().Next(Terms.Length);

            while (i < Terms.Length)
            {
                int tf = TestUtil.NextInt(Random(), 1, maxTF);
                for (int j = 0; j < tf; j++)
                {
                    shuffled.Add(Terms[i]);
                }
                i++;
            }
            shuffled = CollectionsHelper.Shuffle(shuffled);
            foreach (string term in shuffled)
            {
                sb.Append(term);
                sb.Append(' ');
            }
            return(sb.ToString());
        }
Esempio n. 19
0
        public virtual void TestSimple()
        {
            int numNodes = TestUtil.NextInt(Random(), 1, 10);

            double runTimeSec = AtLeast(3);

            int minDocsToMakeTerms = TestUtil.NextInt(Random(), 5, 20);

            int maxSearcherAgeSeconds = TestUtil.NextInt(Random(), 1, 3);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
            }

            Start(numNodes, runTimeSec, maxSearcherAgeSeconds);

            List <PreviousSearchState> priorSearches = new List <PreviousSearchState>();
            List <BytesRef>            terms         = null;

            while (TimeHelper.NanoTime() < EndTimeNanos)
            {
                bool doFollowon = priorSearches.Count > 0 && Random().Next(7) == 1;

                // Pick a random node; we will run the query on this node:
                int myNodeID = Random().Next(numNodes);

                NodeState.ShardIndexSearcher localShardSearcher;

                PreviousSearchState prevSearchState;

                if (doFollowon)
                {
                    // Pretend user issued a followon query:
                    prevSearchState = priorSearches[Random().Next(priorSearches.Count)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: follow-on query age=" + ((TimeHelper.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0));
                    }

                    try
                    {
                        localShardSearcher = Nodes[myNodeID].Acquire(prevSearchState.Versions);
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected, sometimes; in a "real" app we would
                        // either forward this error to the user ("too
                        // much time has passed; please re-run your
                        // search") or sneakily just switch to newest
                        // searcher w/o telling them...
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during local shard searcher init: " + see);
                        }
                        priorSearches.Remove(prevSearchState);
                        continue;
                    }
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: fresh query");
                    }
                    // Do fresh query:
                    localShardSearcher = Nodes[myNodeID].Acquire();
                    prevSearchState    = null;
                }

                IndexReader[] subs = new IndexReader[numNodes];

                PreviousSearchState searchState = null;

                try
                {
                    // Mock: now make a single reader (MultiReader) from all node
                    // searchers.  In a real shard env you can't do this... we
                    // do it to confirm results from the shard searcher
                    // are correct:
                    int docCount = 0;
                    try
                    {
                        for (int nodeID = 0; nodeID < numNodes; nodeID++)
                        {
                            long          subVersion = localShardSearcher.NodeVersions[nodeID];
                            IndexSearcher sub        = Nodes[nodeID].Searchers.Acquire(subVersion);
                            if (sub == null)
                            {
                                nodeID--;
                                while (nodeID >= 0)
                                {
                                    subs[nodeID].DecRef();
                                    subs[nodeID] = null;
                                    nodeID--;
                                }
                                throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion);
                            }
                            subs[nodeID] = sub.IndexReader;
                            docCount    += subs[nodeID].MaxDoc;
                        }
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during mock reader init: " + see);
                        }
                        continue;
                    }

                    IndexReader   mockReader   = new MultiReader(subs);
                    IndexSearcher mockSearcher = new IndexSearcher(mockReader);

                    Query query;
                    Sort  sort;

                    if (prevSearchState != null)
                    {
                        query = prevSearchState.Query;
                        sort  = prevSearchState.Sort;
                    }
                    else
                    {
                        if (terms == null && docCount > minDocsToMakeTerms)
                        {
                            // TODO: try to "focus" on high freq terms sometimes too
                            // TODO: maybe also periodically reset the terms...?
                            TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").Iterator(null);
                            terms = new List <BytesRef>();
                            while (termsEnum.Next() != null)
                            {
                                terms.Add(BytesRef.DeepCopyOf(termsEnum.Term()));
                            }
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: init terms: " + terms.Count + " terms");
                            }
                            if (terms.Count == 0)
                            {
                                terms = null;
                            }
                        }

                        if (VERBOSE)
                        {
                            Console.WriteLine("  maxDoc=" + mockReader.MaxDoc);
                        }

                        if (terms != null)
                        {
                            if (Random().NextBoolean())
                            {
                                query = new TermQuery(new Term("body", terms[Random().Next(terms.Count)]));
                            }
                            else
                            {
                                string t = terms[Random().Next(terms.Count)].Utf8ToString();
                                string prefix;
                                if (t.Length <= 1)
                                {
                                    prefix = t;
                                }
                                else
                                {
                                    prefix = t.Substring(0, TestUtil.NextInt(Random(), 1, 2));
                                }
                                query = new PrefixQuery(new Term("body", prefix));
                            }

                            if (Random().NextBoolean())
                            {
                                sort = null;
                            }
                            else
                            {
                                // TODO: sort by more than 1 field
                                int what = Random().Next(3);
                                if (what == 0)
                                {
                                    sort = new Sort(SortField.FIELD_SCORE);
                                }
                                else if (what == 1)
                                {
                                    // TODO: this sort doesn't merge
                                    // correctly... it's tricky because you
                                    // could have > 2.1B docs across all shards:
                                    //sort = new Sort(SortField.FIELD_DOC);
                                    sort = null;
                                }
                                else if (what == 2)
                                {
                                    sort = new Sort(new SortField[] { new SortField("docid", SortField.Type_e.INT, Random().NextBoolean()) });
                                }
                                else
                                {
                                    sort = new Sort(new SortField[] { new SortField("title", SortField.Type_e.STRING, Random().NextBoolean()) });
                                }
                            }
                        }
                        else
                        {
                            query = null;
                            sort  = null;
                        }
                    }

                    if (query != null)
                    {
                        try
                        {
                            searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState);
                        }
                        catch (SearcherExpiredException see)
                        {
                            // Expected; in a "real" app we would
                            // either forward this error to the user ("too
                            // much time has passed; please re-run your
                            // search") or sneakily just switch to newest
                            // searcher w/o telling them...
                            if (VERBOSE)
                            {
                                Console.WriteLine("  searcher expired during search: " + see);
                                Console.Out.Write(see.StackTrace);
                            }
                            // We can't do this in general: on a very slow
                            // computer it's possible the local searcher
                            // expires before we can finish our search:
                            // assert prevSearchState != null;
                            if (prevSearchState != null)
                            {
                                priorSearches.Remove(prevSearchState);
                            }
                        }
                    }
                }
                finally
                {
                    Nodes[myNodeID].Release(localShardSearcher);
                    foreach (IndexReader sub in subs)
                    {
                        if (sub != null)
                        {
                            sub.DecRef();
                        }
                    }
                }

                if (searchState != null && searchState.SearchAfterLocal != null && Random().Next(5) == 3)
                {
                    priorSearches.Add(searchState);
                    if (priorSearches.Count > 200)
                    {
                        priorSearches = (List <PreviousSearchState>)CollectionsHelper.Shuffle(priorSearches);
                        priorSearches.SubList(100, priorSearches.Count).Clear();
                    }
                }
            }

            Finish();
        }
Esempio n. 20
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType((FieldType)prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = FieldInfo.IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                                  // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Size());
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Esempio n. 21
0
        public virtual void TestRandomStoredFields()
        {
            Directory         dir  = NewDirectory();
            Random            rand = Random();
            RandomIndexWriter w    = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20)));
            //w.w.setNoCFSRatio(0.0);
            int docCount   = AtLeast(200);
            int fieldCount = TestUtil.NextInt(rand, 1, 5);

            IList <int?> fieldIDs = new List <int?>();

            FieldType customType = new FieldType(TextField.TYPE_STORED);

            customType.Tokenized = false;
            Field idField = NewField("id", "", customType);

            for (int i = 0; i < fieldCount; i++)
            {
                fieldIDs.Add(i);
            }

            IDictionary <string, Document> docs = new Dictionary <string, Document>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: build index docCount=" + docCount);
            }

            FieldType customType2 = new FieldType();

            customType2.Stored = true;
            for (int i = 0; i < docCount; i++)
            {
                Document doc = new Document();
                doc.Add(idField);
                string id = "" + i;
                idField.StringValue = id;
                docs[id]            = doc;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: add doc id=" + id);
                }

                foreach (int field in fieldIDs)
                {
                    string s;
                    if (rand.Next(4) != 3)
                    {
                        s = TestUtil.RandomUnicodeString(rand, 1000);
                        doc.Add(NewField("f" + field, s, customType2));
                    }
                    else
                    {
                        s = null;
                    }
                }
                w.AddDocument(doc);
                if (rand.Next(50) == 17)
                {
                    // mixup binding of field name -> Number every so often
                    fieldIDs = CollectionsHelper.Shuffle(fieldIDs);
                }
                if (rand.Next(5) == 3 && i > 0)
                {
                    string delID = "" + rand.Next(i);
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: delete doc id=" + delID);
                    }
                    w.DeleteDocuments(new Term("id", delID));
                    docs.Remove(delID);
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields");
            }
            if (docs.Count > 0)
            {
                string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/);

                for (int x = 0; x < 2; x++)
                {
                    IndexReader   r = w.Reader;
                    IndexSearcher s = NewSearcher(r);

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle x=" + x + " r=" + r);
                    }

                    int num = AtLeast(1000);
                    for (int iter = 0; iter < num; iter++)
                    {
                        string testID = idsList[rand.Next(idsList.Length)];
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: test id=" + testID);
                        }
                        TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1);
                        Assert.AreEqual(1, hits.TotalHits);
                        Document doc    = r.Document(hits.ScoreDocs[0].Doc);
                        Document docExp = docs[testID];
                        for (int i = 0; i < fieldCount; i++)
                        {
                            Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i));
                        }
                    }
                    r.Dispose();
                    w.ForceMerge(1);
                }
            }
            w.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }

            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
            {
                Assert.AreEqual(numTerms - 1, terms.Size());
            }
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  term_;

            while ((term_ = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(term_.Utf8ToString());
                Assert.AreEqual(value, termsEnum.DocFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Esempio n. 23
0
        private void AssertTermsSeeking(Terms leftTerms, Terms rightTerms)
        {
            TermsEnum leftEnum  = null;
            TermsEnum rightEnum = null;

            // just an upper bound
            int    numTests = AtLeast(20);
            Random random   = Random();

            // collect this number of terms from the left side
            HashSet <BytesRef> tests = new HashSet <BytesRef>();
            int numPasses            = 0;

            while (numPasses < 10 && tests.Count < numTests)
            {
                leftEnum = leftTerms.Iterator(leftEnum);
                BytesRef term = null;
                while ((term = leftEnum.Next()) != null)
                {
                    int code = random.Next(10);
                    if (code == 0)
                    {
                        // the term
                        tests.Add(BytesRef.DeepCopyOf(term));
                    }
                    else if (code == 1)
                    {
                        // truncated subsequence of term
                        term = BytesRef.DeepCopyOf(term);
                        if (term.Length > 0)
                        {
                            // truncate it
                            term.Length = random.Next(term.Length);
                        }
                    }
                    else if (code == 2)
                    {
                        // term, but ensure a non-zero offset
                        var newbytes = new byte[term.Length + 5];
                        Array.Copy(term.Bytes, term.Offset, newbytes, 5, term.Length);
                        tests.Add(new BytesRef(newbytes, 5, term.Length));
                    }
                }
                numPasses++;
            }

            List <BytesRef> shuffledTests = new List <BytesRef>(tests);

            shuffledTests = (List <BytesRef>)CollectionsHelper.Shuffle(shuffledTests);

            foreach (BytesRef b in shuffledTests)
            {
                leftEnum  = leftTerms.Iterator(leftEnum);
                rightEnum = rightTerms.Iterator(rightEnum);

                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));
                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));

                SeekStatus leftStatus;
                SeekStatus rightStatus;

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term(), rightEnum.Term());
                }

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term(), rightEnum.Term());
                }
            }
        }