Base class aiming at testing TermVectorsFormat. To test a new format, all you need is to register a new Codec which uses it and extend this class and override #getCodec(). @lucene.experimental
            protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
            {
                this.OuterInstance = outerInstance;
                if (fieldCount > fieldNames.Length)
                {
                    throw new System.ArgumentException();
                }
                this.FieldNames = new string[fieldCount];
                FieldTypes      = new FieldType[fieldCount];
                TokenStreams    = new RandomTokenStream[fieldCount];
                Arrays.Fill(FieldTypes, outerInstance.FieldType(options));
                HashSet <string> usedFileNames = new HashSet <string>();

                for (int i = 0; i < fieldCount; ++i)
                {
                    // LUCENENET NOTE: Using a simple Linq query to filter rather than using brute force makes this a lot
                    // faster (and won't infinitely retry due to poor random distribution).
                    this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames.Except(usedFileNames).ToArray());
                    //do
                    //{
                    //    this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames);
                    //} while (usedFileNames.Contains(this.FieldNames[i]));

                    usedFileNames.Add(this.FieldNames[i]);
                    TokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt(Random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
                }
            }
Exemplo n.º 2
0
 public ThreadAnonymousClass(BaseTermVectorsFormatTestCase outerInstance, int numDocs, RandomDocument[] docs, IndexReader reader, AtomicReference <Exception> exception)
 {
     this.outerInstance = outerInstance;
     this.numDocs       = numDocs;
     this.docs          = docs;
     this.reader        = reader;
     this.exception     = exception;
 }
 public ThreadAnonymousInnerClassHelper(BaseTermVectorsFormatTestCase outerInstance, int numDocs, Lucene.Net.Index.BaseTermVectorsFormatTestCase.RandomDocument[] docs, IndexReader reader, AtomicObject <Exception> exception, int i)
 {
     this.OuterInstance = outerInstance;
     this.NumDocs       = numDocs;
     this.Docs          = docs;
     this.Reader        = reader;
     this.ARException   = exception;
     this.i             = i;
 }
            protected internal RandomDocumentFactory(BaseTermVectorsFormatTestCase outerInstance, int distinctFieldNames, int disctinctTerms)
            {
                this.OuterInstance = outerInstance;
                HashSet <string> fieldNames = new HashSet <string>();

                while (fieldNames.Count < distinctFieldNames)
                {
                    fieldNames.Add(TestUtil.RandomSimpleString(Random()));
                    fieldNames.Remove("id");
                }
                this.FieldNames = fieldNames.ToArray(/*new string[0]*/);
                Terms           = new string[disctinctTerms];
                TermBytes       = new BytesRef[disctinctTerms];
                for (int i = 0; i < disctinctTerms; ++i)
                {
                    Terms[i]     = TestUtil.RandomRealisticUnicodeString(Random());
                    TermBytes[i] = new BytesRef(Terms[i]);
                }
            }
            protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
            {
                this.OuterInstance = outerInstance;
                if (fieldCount > fieldNames.Length)
                {
                    throw new System.ArgumentException();
                }
                this.FieldNames = new string[fieldCount];
                FieldTypes      = new FieldType[fieldCount];
                TokenStreams    = new RandomTokenStream[fieldCount];
                Arrays.Fill(FieldTypes, outerInstance.FieldType(options));
                HashSet <string> usedFileNames = new HashSet <string>();

                for (int i = 0; i < fieldCount; ++i)
                {
                    do
                    {
                        this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames);
                    } while (usedFileNames.Contains(this.FieldNames[i]));
                    usedFileNames.Add(this.FieldNames[i]);
                    TokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt(Random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
                }
            }
            protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes, bool offsetsGoBackwards)
            {
                this.OuterInstance = outerInstance;
                Terms               = new string[len];
                TermBytes           = new BytesRef[len];
                PositionsIncrements = new int[len];
                Positions           = new int[len];
                StartOffsets        = new int[len];
                EndOffsets          = new int[len];
                Payloads            = new BytesRef[len];
                for (int i = 0; i < len; ++i)
                {
                    int o = Random().Next(sampleTerms.Length);
                    Terms[i]               = sampleTerms[o];
                    TermBytes[i]           = sampleTermBytes[o];
                    PositionsIncrements[i] = TestUtil.NextInt(Random(), i == 0 ? 1 : 0, 10);
                    if (offsetsGoBackwards)
                    {
                        StartOffsets[i] = Random().Next();
                        EndOffsets[i]   = Random().Next();
                    }
                    else
                    {
                        if (i == 0)
                        {
                            StartOffsets[i] = TestUtil.NextInt(Random(), 0, 1 << 16);
                        }
                        else
                        {
                            StartOffsets[i] = StartOffsets[i - 1] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 16 : 20);
                        }
                        EndOffsets[i] = StartOffsets[i] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 10 : 20);
                    }
                }

                for (int i = 0; i < len; ++i)
                {
                    if (i == 0)
                    {
                        Positions[i] = PositionsIncrements[i] - 1;
                    }
                    else
                    {
                        Positions[i] = Positions[i - 1] + PositionsIncrements[i];
                    }
                }
                if (Rarely())
                {
                    Arrays.Fill(Payloads, outerInstance.RandomPayload());
                }
                else
                {
                    for (int i = 0; i < len; ++i)
                    {
                        Payloads[i] = outerInstance.RandomPayload();
                    }
                }

                PositionToTerms    = new Dictionary <int?, ISet <int?> >(len);
                StartOffsetToTerms = new Dictionary <int?, ISet <int?> >(len);
                for (int i = 0; i < len; ++i)
                {
                    if (!PositionToTerms.ContainsKey(Positions[i]))
                    {
                        PositionToTerms[Positions[i]] = new HashSet <int?>();//size1
                    }
                    PositionToTerms[Positions[i]].Add(i);
                    if (!StartOffsetToTerms.ContainsKey(StartOffsets[i]))
                    {
                        StartOffsetToTerms[StartOffsets[i]] = new HashSet <int?>();//size1
                    }
                    StartOffsetToTerms[StartOffsets[i]].Add(i);
                }

                Freqs = new Dictionary <string, int?>();
                foreach (string term in Terms)
                {
                    if (Freqs.ContainsKey(term))
                    {
                        Freqs[term] = Freqs[term] + 1;
                    }
                    else
                    {
                        Freqs[term] = 1;
                    }
                }

                AddAttributeImpl(new PermissiveOffsetAttributeImpl());

                TermAtt = AddAttribute <ICharTermAttribute>();
                PiAtt   = AddAttribute <IPositionIncrementAttribute>();
                OAtt    = AddAttribute <IOffsetAttribute>();
                PAtt    = AddAttribute <IPayloadAttribute>();
            }
 protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes)
     : this(outerInstance, len, sampleTerms, sampleTermBytes, Rarely())
 {
     this.OuterInstance = outerInstance;
 }
 public ThreadAnonymousInnerClassHelper(BaseTermVectorsFormatTestCase outerInstance, int numDocs, Lucene.Net.Index.BaseTermVectorsFormatTestCase.RandomDocument[] docs, IndexReader reader, AtomicObject<Exception> exception, int i)
 {
     this.OuterInstance = outerInstance;
     this.NumDocs = numDocs;
     this.Docs = docs;
     this.Reader = reader;
     this.ARException = exception;
     this.i = i;
 }
 protected internal RandomDocumentFactory(BaseTermVectorsFormatTestCase outerInstance, int distinctFieldNames, int disctinctTerms)
 {
     this.OuterInstance = outerInstance;
     HashSet<string> fieldNames = new HashSet<string>();
     while (fieldNames.Count < distinctFieldNames)
     {
         fieldNames.Add(TestUtil.RandomSimpleString(Random()));
         fieldNames.Remove("id");
     }
     this.FieldNames = fieldNames.ToArray(/*new string[0]*/);
     Terms = new string[disctinctTerms];
     TermBytes = new BytesRef[disctinctTerms];
     for (int i = 0; i < disctinctTerms; ++i)
     {
         Terms[i] = TestUtil.RandomRealisticUnicodeString(Random());
         TermBytes[i] = new BytesRef(Terms[i]);
     }
 }
            protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
            {
                this.OuterInstance = outerInstance;
                if (fieldCount > fieldNames.Length)
                {
                    throw new System.ArgumentException();
                }
                this.FieldNames = new string[fieldCount];
                FieldTypes = new FieldType[fieldCount];
                TokenStreams = new RandomTokenStream[fieldCount];
                Arrays.Fill(FieldTypes, outerInstance.FieldType(options));
                HashSet<string> usedFileNames = new HashSet<string>();
                for (int i = 0; i < fieldCount; ++i)
                {
                    // LUCENENET NOTE: Using a simple Linq query to filter rather than using brute force makes this a lot
                    // faster (and won't infinitely retry due to poor random distribution).
                    this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames.Except(usedFileNames).ToArray());
                    //do
                    //{
                    //    this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames);
                    //} while (usedFileNames.Contains(this.FieldNames[i]));

                    usedFileNames.Add(this.FieldNames[i]);
                    TokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt(Random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
                }
            }
            protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes, bool offsetsGoBackwards)
            {
                this.OuterInstance = outerInstance;
                Terms = new string[len];
                TermBytes = new BytesRef[len];
                PositionsIncrements = new int[len];
                Positions = new int[len];
                StartOffsets = new int[len];
                EndOffsets = new int[len];
                Payloads = new BytesRef[len];
                for (int i = 0; i < len; ++i)
                {
                    int o = Random().Next(sampleTerms.Length);
                    Terms[i] = sampleTerms[o];
                    TermBytes[i] = sampleTermBytes[o];
                    PositionsIncrements[i] = TestUtil.NextInt(Random(), i == 0 ? 1 : 0, 10);
                    if (offsetsGoBackwards)
                    {
                        StartOffsets[i] = Random().Next();
                        EndOffsets[i] = Random().Next();
                    }
                    else
                    {
                        if (i == 0)
                        {
                            StartOffsets[i] = TestUtil.NextInt(Random(), 0, 1 << 16);
                        }
                        else
                        {
                            StartOffsets[i] = StartOffsets[i - 1] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 16 : 20);
                        }
                        EndOffsets[i] = StartOffsets[i] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 10 : 20);
                    }
                }

                for (int i = 0; i < len; ++i)
                {
                    if (i == 0)
                    {
                        Positions[i] = PositionsIncrements[i] - 1;
                    }
                    else
                    {
                        Positions[i] = Positions[i - 1] + PositionsIncrements[i];
                    }
                }
                if (Rarely())
                {
                    Arrays.Fill(Payloads, outerInstance.RandomPayload());
                }
                else
                {
                    for (int i = 0; i < len; ++i)
                    {
                        Payloads[i] = outerInstance.RandomPayload();
                    }
                }

                PositionToTerms = new Dictionary<int?, ISet<int?>>(len);
                StartOffsetToTerms = new Dictionary<int?, ISet<int?>>(len);
                for (int i = 0; i < len; ++i)
                {
                    if (!PositionToTerms.ContainsKey(Positions[i]))
                    {
                        PositionToTerms[Positions[i]] = new HashSet<int?>();//size1
                    }
                    PositionToTerms[Positions[i]].Add(i);
                    if (!StartOffsetToTerms.ContainsKey(StartOffsets[i]))
                    {
                        StartOffsetToTerms[StartOffsets[i]] = new HashSet<int?>();//size1
                    }
                    StartOffsetToTerms[StartOffsets[i]].Add(i);
                }

                Freqs = new Dictionary<string, int?>();
                foreach (string term in Terms)
                {
                    if (Freqs.ContainsKey(term))
                    {
                        Freqs[term] = Freqs[term] + 1;
                    }
                    else
                    {
                        Freqs[term] = 1;
                    }
                }

                AddAttributeImpl(new PermissiveOffsetAttributeImpl());

                TermAtt = AddAttribute<ICharTermAttribute>();
                PiAtt = AddAttribute<IPositionIncrementAttribute>();
                OAtt = AddAttribute<IOffsetAttribute>();
                PAtt = AddAttribute<IPayloadAttribute>();
            }
 protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes)
     : this(outerInstance, len, sampleTerms, sampleTermBytes, Rarely())
 {
     this.OuterInstance = outerInstance;
 }
 protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes)
 {
     this.OuterInstance = outerInstance;
     if (fieldCount > fieldNames.Length)
     {
         throw new System.ArgumentException();
     }
     this.FieldNames = new string[fieldCount];
     FieldTypes = new FieldType[fieldCount];
     TokenStreams = new RandomTokenStream[fieldCount];
     Arrays.Fill(FieldTypes, outerInstance.FieldType(options));
     HashSet<string> usedFileNames = new HashSet<string>();
     for (int i = 0; i < fieldCount; ++i)
     {
         do
         {
             this.FieldNames[i] = RandomInts.RandomFrom(Random(), fieldNames);
         } while (usedFileNames.Contains(this.FieldNames[i]));
         usedFileNames.Add(this.FieldNames[i]);
         TokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt(Random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
     }
 }
            protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes, bool offsetsGoBackwards)
            {
                this.outerInstance = outerInstance;
                terms               = new string[len];
                termBytes           = new BytesRef[len];
                positionsIncrements = new int[len];
                positions           = new int[len];
                startOffsets        = new int[len];
                endOffsets          = new int[len];
                payloads            = new BytesRef[len];
                for (int i = 0; i < len; ++i)
                {
                    int o = Random.Next(sampleTerms.Length);
                    terms[i]               = sampleTerms[o];
                    termBytes[i]           = sampleTermBytes[o];
                    positionsIncrements[i] = TestUtil.NextInt32(Random, i == 0 ? 1 : 0, 10);
                    if (offsetsGoBackwards)
                    {
                        startOffsets[i] = Random.Next();
                        endOffsets[i]   = Random.Next();
                    }
                    else
                    {
                        if (i == 0)
                        {
                            startOffsets[i] = TestUtil.NextInt32(Random, 0, 1 << 16);
                        }
                        else
                        {
                            startOffsets[i] = startOffsets[i - 1] + TestUtil.NextInt32(Random, 0, Rarely() ? 1 << 16 : 20);
                        }
                        endOffsets[i] = startOffsets[i] + TestUtil.NextInt32(Random, 0, Rarely() ? 1 << 10 : 20);
                    }
                }

                for (int i = 0; i < len; ++i)
                {
                    if (i == 0)
                    {
                        positions[i] = positionsIncrements[i] - 1;
                    }
                    else
                    {
                        positions[i] = positions[i - 1] + positionsIncrements[i];
                    }
                }
                if (Rarely())
                {
                    Arrays.Fill(payloads, outerInstance.RandomPayload());
                }
                else
                {
                    for (int i = 0; i < len; ++i)
                    {
                        payloads[i] = outerInstance.RandomPayload();
                    }
                }

                positionToTerms    = new Dictionary <int?, ISet <int?> >(len);
                startOffsetToTerms = new Dictionary <int?, ISet <int?> >(len);
                for (int i = 0; i < len; ++i)
                {
                    if (!positionToTerms.ContainsKey(positions[i]))
                    {
                        positionToTerms[positions[i]] = new HashSet <int?>();//size1
                    }
                    positionToTerms[positions[i]].Add(i);
                    if (!startOffsetToTerms.ContainsKey(startOffsets[i]))
                    {
                        startOffsetToTerms[startOffsets[i]] = new HashSet <int?>();//size1
                    }
                    startOffsetToTerms[startOffsets[i]].Add(i);
                }

                freqs = new Dictionary <string, int?>();
                foreach (string term in terms)
                {
                    if (freqs.ContainsKey(term))
                    {
                        freqs[term] = freqs[term] + 1;
                    }
                    else
                    {
                        freqs[term] = 1;
                    }
                }

                AddAttributeImpl(new PermissiveOffsetAttribute());

                termAtt = AddAttribute <ICharTermAttribute>();
                piAtt   = AddAttribute <IPositionIncrementAttribute>();
                oAtt    = AddAttribute <IOffsetAttribute>();
                pAtt    = AddAttribute <IPayloadAttribute>();
            }
Exemplo n.º 15
0
 protected internal RandomTokenStream(BaseTermVectorsFormatTestCase baseTermVectorsFormatTestCase, int len, string[] sampleTerms, BytesRef[] sampleTermBytes)
     : this(baseTermVectorsFormatTestCase, len, sampleTerms, sampleTermBytes, Rarely())
 {
 }