コード例 #1
0
            protected internal RandomDocument(BaseTermVectorsFormatTestCase baseTermVectorsFormaTestCase, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
            {
                if (fieldCount > fieldNames.Length)
                {
                    throw new ArgumentException();
                }
                this.fieldNames = new string[fieldCount];
                fieldTypes      = new FieldType[fieldCount];
                tokenStreams    = new RandomTokenStream[fieldCount];
                Arrays.Fill(fieldTypes, baseTermVectorsFormaTestCase.FieldType(options));
                ISet <string> usedFileNames = new JCG.HashSet <string>();

                for (int i = 0; i < fieldCount; ++i)
                {
                    // LUCENENET NOTE: Using a simple Linq query to filter rather than using brute force makes this a lot
                    // faster (and won't infinitely retry due to poor random distribution).
                    this.fieldNames[i] = RandomPicks.RandomFrom(Random, fieldNames.Except(usedFileNames).ToArray());
                    //do
                    //{
                    //    this.FieldNames[i] = RandomPicks.RandomFrom(Random(), fieldNames);
                    //} while (usedFileNames.Contains(this.FieldNames[i]));

                    usedFileNames.Add(this.fieldNames[i]);
                    tokenStreams[i] = new RandomTokenStream(baseTermVectorsFormaTestCase, TestUtil.NextInt32(Random, 1, maxTermCount), sampleTerms, sampleTermBytes);
                }
            }
コード例 #2
0
            protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
            {
                this.OuterInstance = outerInstance;
                if (fieldCount > fieldNames.Length)
                {
                    throw new System.ArgumentException();
                }
                this.FieldNames = new string[fieldCount];
                FieldTypes      = new FieldType[fieldCount];
                TokenStreams    = new RandomTokenStream[fieldCount];
                Arrays.Fill(FieldTypes, outerInstance.FieldType(options));
                HashSet <string> usedFileNames = new HashSet <string>();

                for (int i = 0; i < fieldCount; ++i)
                {
                    do
                    {
                        this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames);
                    } while (usedFileNames.Contains(this.FieldNames[i]));
                    usedFileNames.Add(this.FieldNames[i]);
                    TokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt(Random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
                }
            }
コード例 #3
0
        protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = (new HashSet <string>(Arrays.AsList(tk.Terms))).Count;

            Assert.AreEqual(termCount, terms.Size());
            Assert.AreEqual(termCount, terms.SumDocFreq);
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions());
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets());
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads());
            HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>();

            foreach (string term in tk.Freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparator);
            TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                BytesRef nextTerm = termsEnum.Next();
                Assert.AreEqual(sortedTerms[i], nextTerm);
                Assert.AreEqual(sortedTerms[i], termsEnum.Term());
                Assert.AreEqual(1, termsEnum.DocFreq());

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID());
                Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq());
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions() || terms.HasOffsets())
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq();
                    Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int         position = docsAndPositionsEnum.NextPosition();
                            ISet <int?> indexes;
                            if (terms.HasPositions())
                            {
                                indexes = tk.PositionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions())
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets())
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset())
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads())
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
                        catch (Exception e)
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsNull(termsEnum.Next());
            for (int i = 0; i < 5; ++i)
            {
                if (Random().NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
            }
        }
コード例 #4
0
        protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = (new HashSet<string>(Arrays.AsList(tk.Terms))).Count;
            Assert.AreEqual(termCount, terms.Size());
            Assert.AreEqual(termCount, terms.SumDocFreq);
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions());
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets());
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads());
            HashSet<BytesRef> uniqueTerms = new HashSet<BytesRef>();
            foreach (string term in tk.Freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparator);
            TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value);
            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                BytesRef nextTerm = termsEnum.Next();
                Assert.AreEqual(sortedTerms[i], nextTerm);
                Assert.AreEqual(sortedTerms[i], termsEnum.Term());
                Assert.AreEqual(1, termsEnum.DocFreq());

                FixedBitSet bits = new FixedBitSet(1);
                DocsEnum docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID());
                Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq());
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions() || terms.HasOffsets())
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq();
                    Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int position = docsAndPositionsEnum.NextPosition();
                            ISet<int?> indexes;
                            if (terms.HasPositions())
                            {
                                indexes = tk.PositionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions())
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets())
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset())
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads())
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
                        catch (Exception e)
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsNull(termsEnum.Next());
            for (int i = 0; i < 5; ++i)
            {
                if (Random().NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
            }
        }
コード例 #5
0
            protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
            {
                this.OuterInstance = outerInstance;
                if (fieldCount > fieldNames.Length)
                {
                    throw new System.ArgumentException();
                }
                this.FieldNames = new string[fieldCount];
                FieldTypes = new FieldType[fieldCount];
                TokenStreams = new RandomTokenStream[fieldCount];
                Arrays.Fill(FieldTypes, outerInstance.FieldType(options));
                HashSet<string> usedFileNames = new HashSet<string>();
                for (int i = 0; i < fieldCount; ++i)
                {
                    // LUCENENET NOTE: Using a simple Linq query to filter rather than using brute force makes this a lot
                    // faster (and won't infinitely retry due to poor random distribution).
                    this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames.Except(usedFileNames).ToArray());
                    //do
                    //{
                    //    this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames);
                    //} while (usedFileNames.Contains(this.FieldNames[i]));

                    usedFileNames.Add(this.FieldNames[i]);
                    TokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt(Random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
                }
            }
コード例 #6
0
 protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
 {
     this.OuterInstance = outerInstance;
     if (fieldCount > fieldNames.Length)
     {
         throw new System.ArgumentException();
     }
     this.FieldNames = new string[fieldCount];
     FieldTypes = new FieldType[fieldCount];
     TokenStreams = new RandomTokenStream[fieldCount];
     Arrays.Fill(FieldTypes, outerInstance.FieldType(options));
     HashSet<string> usedFileNames = new HashSet<string>();
     for (int i = 0; i < fieldCount; ++i)
     {
         do
         {
             this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames);
         } while (usedFileNames.Contains(this.FieldNames[i]));
         usedFileNames.Add(this.FieldNames[i]);
         TokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt(Random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
     }
 }
コード例 #7
0
        protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = new JCG.HashSet <string>(tk.terms).Count;

            Assert.AreEqual((long)termCount, terms.Count);      // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions);
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets);
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads);
            ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>();

            foreach (string term in tk.freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparer);
            TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                Assert.AreEqual(sortedTerms[i], termsEnum.Term);
                Assert.AreEqual(1, termsEnum.DocFreq);

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID);
                Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions || terms.HasOffsets)
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq;
                    Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int        position = docsAndPositionsEnum.NextPosition();
                            ISet <int> indexes;
                            if (terms.HasPositions)
                            {
                                indexes = tk.positionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions)
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets)
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset)
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads)
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload()))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
                        catch (Exception e) when(e.IsException())
                        {
                            // ok
                        }
                        catch (Exception e) when(e.IsAssertionError())
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsFalse(termsEnum.MoveNext());
            for (int i = 0; i < 5; ++i)
            {
                if (Random.NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
            }
        }