예제 #1
0
        private static Document Doc(int id, PositionsTokenStream positions)
        {
            Document doc = new Document();

            doc.Add(new StringField(ID_FIELD, id.ToString(), Field.Store.YES));
            doc.Add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Field.Store.NO));
            positions.SetId(id);
            if (DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
            {
                // codec doesnt support offsets: just index positions for the field
                doc.Add(new Field(DOC_POSITIONS_FIELD, positions, TextField.TYPE_NOT_STORED));
            }
            else
            {
                doc.Add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE));
            }
            doc.Add(new NumericDocValuesField(NUMERIC_DV_FIELD, id));
            TextField norms = new TextField(NORMS_FIELD, id.ToString(), Field.Store.NO);

            norms.Boost = (Number.Int32BitsToSingle(id));
            doc.Add(norms);
            doc.Add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(id.ToString())));
            doc.Add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(id.ToString())));
            if (DefaultCodecSupportsSortedSet)
            {
                doc.Add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(id.ToString())));
                doc.Add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef((id + 1).ToString())));
            }
            doc.Add(new Field(TERM_VECTORS_FIELD, id.ToString(), TERM_VECTORS_TYPE));
            return(doc);
        }
예제 #2
0
        public virtual void TestDocsAndPositionsEnum()
        {
            TermsEnum termsEnum = reader.GetTerms(DOC_POSITIONS_FIELD).GetIterator(null);

            assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOC_POSITIONS_TERM)));
            DocsAndPositionsEnum sortedPositions = termsEnum.DocsAndPositions(null, null);
            int doc;

            // test nextDoc()
            while ((doc = sortedPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq;
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset);
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset);
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.GetPayload().Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }

            // test advance()
            DocsAndPositionsEnum reuse = sortedPositions;

            sortedPositions = termsEnum.DocsAndPositions(null, reuse);
            if (sortedPositions is SortingAtomicReader.SortingDocsAndPositionsEnum)
            {
                assertTrue(((SortingAtomicReader.SortingDocsAndPositionsEnum)sortedPositions).Reused(reuse)); // make sure reuse worked
            }
            doc = 0;
            while ((doc = sortedPositions.Advance(doc + TestUtil.NextInt32(Random, 1, 5))) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq;
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset);
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset);
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.GetPayload().Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }
        }
예제 #3
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType((FieldType)prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = FieldInfo.IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                                  // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Size());
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
예제 #4
0
        public virtual void Test()
        {
            IList <string> postingsList   = new JCG.List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                //numTerms /= 2;
                // LUCENENET specific - To keep this under the 1 hour free limit
                // of Azure DevOps, this was reduced from /2 to /6.
                numTerms /= 6;
            }
            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType(prototype.FieldType);

            if (Random.NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random.Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                        // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random.Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousClass(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Count);
            TermsEnum termsEnum = terms.GetEnumerator();

            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.TotalTermFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
예제 #5
0
        private static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw)
        {
            LineFileDocs docs = new LineFileDocs(random);
            Document     doc = null;
            Field        field = null, currentField = null;
            StringReader bogus = new StringReader("");

            if (iw != null)
            {
                doc = new Document();
                FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
                if (random.NextBoolean())
                {
                    ft.StoreTermVectors         = true;
                    ft.StoreTermVectorOffsets   = random.NextBoolean();
                    ft.StoreTermVectorPositions = random.NextBoolean();
                    if (ft.StoreTermVectorPositions && !OLD_FORMAT_IMPERSONATION_IS_ACTIVE)
                    {
                        ft.StoreTermVectorPayloads = random.NextBoolean();
                    }
                }
                if (random.NextBoolean())
                {
                    ft.OmitNorms = true;
                }
                string pf = TestUtil.GetPostingsFormat("dummy");
                bool   supportsOffsets = !DoesntSupportOffsets.Contains(pf);
                switch (random.Next(4))
                {
                case 0:
                    ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                    break;

                case 1:
                    ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                    break;

                case 2:
                    ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    break;

                default:
                    if (supportsOffsets && offsetsAreCorrect)
                    {
                        ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    }
                    else
                    {
                        ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }
                    break;
                }
                currentField = field = new Field("dummy", bogus, ft);
                doc.Add(currentField);
            }

            try
            {
                for (int i = 0; i < iterations; i++)
                {
                    string text;

                    if (random.Next(10) == 7)
                    {
                        // real data from linedocs
                        text = docs.NextDoc().Get("body");
                        if (text.Length > maxWordLength)
                        {
                            // Take a random slice from the text...:
                            int startPos = random.Next(text.Length - maxWordLength);
                            if (startPos > 0 && char.IsLowSurrogate(text[startPos]))
                            {
                                // Take care not to split up a surrogate pair:
                                startPos--;
                                Assert.True(char.IsHighSurrogate(text[startPos]));
                            }
                            int endPos = startPos + maxWordLength - 1;
                            if (char.IsHighSurrogate(text[endPos]))
                            {
                                // Take care not to split up a surrogate pair:
                                endPos--;
                            }
                            text = text.Substring(startPos, 1 + endPos - startPos);
                        }
                    }
                    else
                    {
                        // synthetic
                        text = TestUtil.RandomAnalysisString(random, maxWordLength, simple);
                    }

                    try
                    {
                        CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField);
                        if (iw != null)
                        {
                            if (random.Next(7) == 0)
                            {
                                // pile up a multivalued field
                                var ft = (FieldType)field.FieldType();
                                currentField = new Field("dummy", bogus, ft);
                                doc.Add(currentField);
                            }
                            else
                            {
                                iw.AddDocument(doc);
                                if (doc.Fields.Count > 1)
                                {
                                    // back to 1 field
                                    currentField = field;
                                    doc.RemoveFields("dummy");
                                    doc.Add(currentField);
                                }
                            }
                        }
                    }
                    catch (Exception t)
                    {
                        // TODO: really we should pass a random seed to
                        // checkAnalysisConsistency then print it here too:
                        Console.Error.WriteLine("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + Escape(text) + "'");
                        throw;
                    }
                }
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(docs);
            }
        }