{@code AtomicReader} is an abstract class, providing an interface for accessing an index. Search of an index is done entirely through this abstract interface, so that any subclass which implements it is searchable. IndexReaders implemented by this subclass do not consist of several sub-readers, they are atomic. They support retrieval of stored fields, doc values, terms, and postings.

For efficiency, in this API documents are often referred to via document numbers, non-negative integers which each name a unique document in the index. These document numbers are ephemeral -- they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions.

NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external synchronization, you should not synchronize on the IndexReader instance; use your own (non-Lucene) objects instead.

Inheritance: IndexReader
        public override void SetUp()
        {
            base.SetUp();
            _dir = NewDirectory();
            _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.StoreTermVectors = true;
            ft.StoreTermVectorOffsets = true;
            ft.StoreTermVectorPositions = true;

            Analyzer analyzer = new MockAnalyzer(Random());

            Document doc;
            for (int i = 0; i < 100; i++)
            {
                doc = new Document();
                doc.Add(new Field(_idFieldName, Random().toString(), ft));
                doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append(
                    Random().toString()).toString(), ft));
                doc.Add(new Field(_classFieldName, Random().toString(), ft));
                _indexWriter.AddDocument(doc, analyzer);
            }

            _indexWriter.Commit();

            _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader);
        }
        public override void SetUp()
        {
            base.SetUp();
            Document doc;
            Rd1 = NewDirectory();
            IndexWriter iw1 = new IndexWriter(Rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            doc = new Document();
            doc.Add(NewTextField("field1", "the quick brown fox jumps", Field.Store.YES));
            doc.Add(NewTextField("field2", "the quick brown fox jumps", Field.Store.YES));
            iw1.AddDocument(doc);

            iw1.Dispose();
            Rd2 = NewDirectory();
            IndexWriter iw2 = new IndexWriter(Rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            doc = new Document();
            doc.Add(NewTextField("field1", "the fox jumps over the lazy dog", Field.Store.YES));
            doc.Add(NewTextField("field3", "the fox jumps over the lazy dog", Field.Store.YES));
            iw2.AddDocument(doc);

            iw2.Dispose();

            this.Ir1 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd1));
            this.Ir2 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd2));
        }
 /// <summary>
 /// called only from static open() methods </summary>
 internal StandardDirectoryReader(Directory directory, AtomicReader[] readers, IndexWriter writer, SegmentInfos sis, int termInfosIndexDivisor, bool applyAllDeletes)
     : base(directory, readers)
 {
     this.Writer = writer;
     this.SegmentInfos = sis;
     this.TermInfosIndexDivisor = termInfosIndexDivisor;
     this.ApplyAllDeletes = applyAllDeletes;
 }
 /// <summary>
 /// Creates a new <seealso cref="AtomicReaderContext"/>
 /// </summary>
 internal AtomicReaderContext(CompositeReaderContext parent, AtomicReader reader, int ord, int docBase, int leafOrd, int leafDocBase)
     : base(parent, ord, docBase)
 {
     this.Ord = leafOrd;
     this.DocBase = leafDocBase;
     this.reader = reader;
     this.leaves = IsTopLevel ? new[] { this } : null; //LUCENE TO-DO suspicous
 }
 /// <summary>
 /// Get the wrapped instance by <code>reader</code> as long as this reader is
 ///  an intance of <seealso cref="FilterAtomicReader"/>.
 /// </summary>
 public static AtomicReader Unwrap(AtomicReader reader)
 {
     while (reader is FilterAtomicReader)
     {
         reader = ((FilterAtomicReader)reader).@in;
     }
     return reader;
 }
 public AssertingAtomicReader(AtomicReader @in)
     : base(@in)
 {
     // check some basic reader sanity
     Debug.Assert(@in.MaxDoc >= 0);
     Debug.Assert(@in.NumDocs <= @in.MaxDoc);
     Debug.Assert(@in.NumDeletedDocs + @in.NumDocs == @in.MaxDoc);
     Debug.Assert([email protected] || @in.NumDeletedDocs > 0 && @in.NumDocs < @in.MaxDoc);
 }
 public AssertingAtomicReader(AtomicReader @in)
     : base(@in)
 {
     // check some basic reader sanity
     Debug.Assert(@in.MaxDoc() >= 0);
     Debug.Assert(@in.NumDocs() <= @in.MaxDoc());
     Debug.Assert(@in.NumDeletedDocs() + @in.NumDocs() == @in.MaxDoc());
     Debug.Assert([email protected]() || @in.NumDeletedDocs() > 0 && @in.NumDocs() < @in.MaxDoc());
 }
            public CachedDistanceFunctionValue(AtomicReader reader, ShapeFieldCacheDistanceValueSource enclosingInstance)
            {
                cache = enclosingInstance.provider.GetCache(reader);
                this.enclosingInstance = enclosingInstance;

                from = enclosingInstance.from;
                calculator = enclosingInstance.ctx.GetDistCalc();
                nullValue = (enclosingInstance.ctx.IsGeo() ? 180 : double.MaxValue);
            }
        public override void Warm(AtomicReader reader)
        {
            long startTime = DateTime.Now.Millisecond;
            int indexedCount = 0;
            int docValuesCount = 0;
            int normsCount = 0;
            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.Indexed)
                {
                    reader.Terms(info.Name);
                    indexedCount++;

                    if (info.HasNorms())
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues())
                {
                    switch (info.DocValuesType)
                    {
                        case DocValuesType_e.NUMERIC:
                            reader.GetNumericDocValues(info.Name);
                            break;

                        case DocValuesType_e.BINARY:
                            reader.GetBinaryDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED:
                            reader.GetSortedDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED_SET:
                            reader.GetSortedSetDocValues(info.Name);
                            break;

                        default:
                            Debug.Assert(false); // unknown dv type
                            break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (InfoStream.IsEnabled("SMSW"))
            {
                InfoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (DateTime.Now.Millisecond - startTime));
            }
        }
Exemple #10
0
 /// <summary>
 /// Creates a <seealso cref="DocMap"/> instance appropriate for
 ///  this reader.
 /// </summary>
 public static DocMap Build(AtomicReader reader)
 {
     int maxDoc = reader.MaxDoc();
     if (!reader.HasDeletions())
     {
         return new NoDelDocMap(maxDoc);
     }
     Bits liveDocs = reader.LiveDocs;
     return Build(maxDoc, liveDocs);
 }
        /// <summary>
        /// Creates this, pulling doc values from the specified
        /// field. 
        /// </summary>
        public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = FacetsConfig.DEFAULT_INDEX_FIELD_NAME)
        {
            this.field = field;
            this.origReader = reader;

            // We need this to create thread-safe MultiSortedSetDV
            // per collector:
            topReader = SlowCompositeReaderWrapper.Wrap(reader);
            SortedSetDocValues dv = topReader.GetSortedSetDocValues(field);
            if (dv == null)
            {
                throw new System.ArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
            }
            if (dv.ValueCount > int.MaxValue)
            {
                throw new System.ArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.ValueCount);
            }
            valueCount = (int)dv.ValueCount;

            // TODO: we can make this more efficient if eg we can be
            // "involved" when IOrdinalMap is being created?  Ie see
            // each term/ord it's assigning as it goes...
            string lastDim = null;
            int startOrd = -1;

            // TODO: this approach can work for full hierarchy?;
            // TaxoReader can't do this since ords are not in
            // "sorted order" ... but we should generalize this to
            // support arbitrary hierarchy:
            for (int ord = 0; ord < valueCount; ord++)
            {
                BytesRef term = new BytesRef();
                dv.LookupOrd(ord, term);
                string[] components = FacetsConfig.StringToPath(term.Utf8ToString());
                if (components.Length != 2)
                {
                    throw new System.ArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.ToString(components) + " " + term.Utf8ToString());
                }
                if (!components[0].Equals(lastDim))
                {
                    if (lastDim != null)
                    {
                        prefixToOrdRange[lastDim] = new OrdRange(startOrd, ord - 1);
                    }
                    startOrd = ord;
                    lastDim = components[0];
                }
            }

            if (lastDim != null)
            {
                prefixToOrdRange[lastDim] = new OrdRange(startOrd, valueCount - 1);
            }
        }
            public DistanceFunctionValue(DistanceValueSource outerInstance, AtomicReader reader)
            {
                this.outerInstance = outerInstance;

                ptX = FieldCache.DEFAULT.GetDoubles(reader, outerInstance.strategy.FieldNameX, true);
                ptY = FieldCache.DEFAULT.GetDoubles(reader, outerInstance.strategy.FieldNameY, true);
                validX = FieldCache.DEFAULT.GetDocsWithField(reader, outerInstance.strategy.FieldNameX);
                validY = FieldCache.DEFAULT.GetDocsWithField(reader, outerInstance.strategy.FieldNameY);

                from = outerInstance.from;
                calculator = outerInstance.strategy.SpatialContext.DistCalc;
                nullValue = (outerInstance.strategy.SpatialContext.IsGeo ? 180 * outerInstance.multiplier : double.MaxValue);
            }
 public virtual DocsAndPositionsEnum GetDocsAndPositions(AtomicReader reader, BytesRef bytes, Bits liveDocs)
 {
     Terms terms = reader.Terms(FieldName);
     if (terms != null)
     {
         TermsEnum te = terms.Iterator(null);
         if (te.SeekExact(bytes))
         {
             return te.DocsAndPositions(liveDocs, null);
         }
     }
     return null;
 }
            public DistanceFunctionValue(DistanceValueSource enclosingInstance, AtomicReader reader)
            {
                this.enclosingInstance = enclosingInstance;

                ptX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.FieldNameX, true);
                ptY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.FieldNameY, true);
                validX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.FieldNameX);
                validY = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.FieldNameY);

                from = enclosingInstance.from;
                calculator = enclosingInstance.strategy.SpatialContext.GetDistCalc();
                nullValue = (enclosingInstance.strategy.SpatialContext.IsGeo() ? 180 : double.MaxValue);
            }
            public BBoxSimilarityValueSourceFunctionValue(AtomicReader reader,
                                                          BBoxSimilarityValueSource enclosingInstance)
            {
                _enclosingInstance = enclosingInstance;
                rect = _enclosingInstance.strategy.SpatialContext.MakeRectangle(0, 0, 0, 0); //reused

                minX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minX, true);
                minY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minY, true);
                maxX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxX, true);
                maxY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxY, true);

                validMinX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_minX);
                validMaxX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_maxX);
            }
        public override void SetUp()
        {
            base.SetUp();
            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), Similarity, TimeZone);

            AddDoc(writer, @"admin guest", @"010", @"20040101", @"Y");
            AddDoc(writer, @"guest", @"020", @"20040101", @"Y");
            AddDoc(writer, @"guest", @"020", @"20050101", @"Y");
            AddDoc(writer, @"admin", @"020", @"20050101", @"Maybe");
            AddDoc(writer, @"admin guest", @"030", @"20050101", @"N");
            reader = SlowCompositeReaderWrapper.Wrap(writer.Reader);
            writer.Dispose();
        }
Exemple #17
0
        private FixedBitSet CorrectBits(AtomicReader reader, Bits acceptDocs)
        {
            FixedBitSet bits = new FixedBitSet(reader.MaxDoc); //assume all are INvalid
            Terms terms = reader.Fields.Terms(fieldName);

            if (terms == null)
            {
                return bits;
            }

            TermsEnum termsEnum = terms.Iterator(null);
            DocsEnum docs = null;
            while (true)
            {
                BytesRef currTerm = termsEnum.Next();
                if (currTerm == null)
                {
                    break;
                }
                else
                {
                    docs = termsEnum.Docs(acceptDocs, docs, DocsEnum.FLAG_NONE);
                    int doc = docs.NextDoc();
                    if (doc != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE)
                        {
                            bits.Set(doc);
                        }
                        else
                        {
                            int lastDoc = doc;
                            while (true)
                            {
                                lastDoc = doc;
                                doc = docs.NextDoc();
                                if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                            }
                            bits.Set(lastDoc);
                        }
                    }
                }
            }
            return bits;
        }
 public static void CheckNorms(AtomicReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.Fields.Length; i++)
     {
         IndexableField f = DocHelper.Fields[i];
         if (f.FieldType().Indexed)
         {
             Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !f.FieldType().OmitNorms);
             Assert.AreEqual(reader.GetNormValues(f.Name()) != null, !DocHelper.NoNorms.ContainsKey(f.Name()));
             if (reader.GetNormValues(f.Name()) == null)
             {
                 // test for norms of null
                 NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name());
                 Assert.IsNull(norms);
             }
         }
     }
 }
        public static void AssertSplit(AtomicReader originalIndex, double testRatio, double crossValidationRatio, params string[] fieldNames)
        {
            BaseDirectoryWrapper trainingIndex = NewDirectory();
            BaseDirectoryWrapper testIndex = NewDirectory();
            BaseDirectoryWrapper crossValidationIndex = NewDirectory();

            try
            {
                DatasetSplitter datasetSplitter = new DatasetSplitter(testRatio, crossValidationRatio);
                datasetSplitter.Split(originalIndex, trainingIndex, testIndex, crossValidationIndex, new MockAnalyzer(Random()), fieldNames);

                NotNull(trainingIndex);
                NotNull(testIndex);
                NotNull(crossValidationIndex);

                DirectoryReader trainingReader = DirectoryReader.Open(trainingIndex);
                True((int)(originalIndex.MaxDoc * (1d - testRatio - crossValidationRatio)) == trainingReader.MaxDoc);
                DirectoryReader testReader = DirectoryReader.Open(testIndex);
                True((int)(originalIndex.MaxDoc * testRatio) == testReader.MaxDoc);
                DirectoryReader cvReader = DirectoryReader.Open(crossValidationIndex);
                True((int)(originalIndex.MaxDoc * crossValidationRatio) == cvReader.MaxDoc);

                trainingReader.Dispose();
                testReader.Dispose();
                cvReader.Dispose();
                CloseQuietly(trainingReader);
                CloseQuietly(testReader);
                CloseQuietly(cvReader);
            }
            finally
            {
                trainingIndex.Dispose();
                testIndex.Dispose();
                crossValidationIndex.Dispose();
            }
        }
        protected override DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader)
        {
            if (docIdSet == null)
            {
                return EMPTY_DOCIDSET;
            }

            if (docIdSet is FixedBitSet)
            {
                // this is different from CachingWrapperFilter: even when the DocIdSet is
                // cacheable, we convert it to a FixedBitSet since we require all the
                // cached filters to be FixedBitSets
                return docIdSet;
            }

            DocIdSetIterator it = docIdSet.GetIterator();
            if (it == null)
            {
                return EMPTY_DOCIDSET;
            }
            FixedBitSet copy = new FixedBitSet(reader.MaxDoc);
            copy.Or(it);
            return copy;
        }
Exemple #21
0
        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix)
        {
            FieldInfo info = reader.FieldInfos.FieldInfo(m_field);

            if (info != null && info.HasDocValues)
            {
                throw IllegalStateException.Create("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType);
            }
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
            {
                // No terms
                return;
            }
            Terms terms = fields.GetTerms(m_field);

            if (terms == null)
            {
                // No terms
                return;
            }

            TermsEnum te        = terms.GetEnumerator();
            BytesRef  seekStart = termPrefix ?? new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
            {
                // No terms match
                return;
            }

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            //
            // enumerate all terms, and build an intermediate form of the un-inverted field.
            //
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            //
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            m_docsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
            {
                BytesRef t = te.Term;
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                {
                    break;
                }
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                {
                    try
                    {
                        m_ordBase = (int)te.Ord;
                        //System.out.println("got ordBase=" + ordBase);
                    }
                    catch (Exception uoe) when(uoe.IsUnsupportedOperationException())
                    {
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new JCG.List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    }
                    testedOrd = true;
                }

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & indexIntervalMask) == 0)
                {
                    // Index this term
                    m_sizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes
                    indexedTerms.Add(indexedTerm);
                }

                int df = te.DocFreq;
                if (df <= m_maxTermDocFreq)
                {
                    m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                    {
                        int doc = m_docsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        //System.out.println("  chunk=" + chunk + " docs");

                        actualDF++;
                        m_termInstances++;

                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                        {
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = val.TripleShift(8);
                            int ilen   = VInt32Size(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                            {
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            }
                            pos        = WriteInt32(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                        }
                        else
                        {
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                            {
                                ipos = 0;
                            }
                            else if ((val & 0x0000ff80) == 0)
                            {
                                ipos = 1;
                            }
                            else if ((val & 0x00ff8000) == 0)
                            {
                                ipos = 2;
                            }
                            else if ((val & 0xff800000) == 0)
                            {
                                ipos = 3;
                            }
                            else
                            {
                                ipos = 4;
                            }

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt32(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                            {
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                {
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                }
                                index[doc] = val;
                            }
                            else
                            {
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                {
                                    tempArr[j] = (sbyte)val;
                                    val        = val.TripleShift(8);
                                }
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                            }
                        }
                    }
                    SetActualDocFreq(termNum, actualDF);
                }

                termNum++;
                if (!te.MoveNext())
                {
                    break;
                }
            }

            m_numTermsInField = termNum;

            long midPoint = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            if (m_termInstances == 0)
            {
                // we didn't invert anything
                // lower memory consumption.
                m_tnums = null;
            }
            else
            {
                this.m_index = index;

                //
                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.
                //

                for (int pass = 0; pass < 256; pass++)
                {
                    var target = m_tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                    {
                        pos = target.Length;
                    }
                    else
                    {
                        target = new sbyte[4096];
                    }

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                    {
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                        {
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                            {
                                int len = val.TripleShift(8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                {
                                    // we only have 24 bits for the array index
                                    throw IllegalStateException.Create("Too many values for UnInvertedField faceting on field " + m_field);
                                }
                                var arr = bytes[doc];

                                /*
                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                 */
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                {
                                    int newlen = target.Length;

                                    //* we don't have to worry about the array getting too large
                                    // since the "pos" param will overflow first (only 24 bits available)
                                    // if ((newlen<<1) <= 0) {
                                    //  // overflow...
                                    //  newlen = Integer.MAX_VALUE;
                                    //  if (newlen <= pos + len) {
                                    //    throw new SolrException(400,"Too many terms to uninvert field!");
                                    //  }
                                    // } else {
                                    //  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    // }
                                    //
                                    while (newlen <= pos + len) // doubling strategy
                                    {
                                        newlen <<= 1;
                                    }
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                }
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator
                            }
                        }
                    }

                    // shrink array
                    if (pos < target.Length)
                    {
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;
                    }

                    m_tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
                    {
                        break;
                    }
                }
            }
            if (indexedTerms != null)
            {
                m_indexedTermsArray = new BytesRef[indexedTerms.Count];
                indexedTerms.CopyTo(m_indexedTermsArray, 0);
            }

            long endTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            m_total_time  = (int)(endTime - startTime);
            m_phase1_time = (int)(midPoint - startTime);
        }
 public ThreadAnonymousInnerClassHelper(TestDocValuesWithThreads outerInstance, IList <long?> numbers, IList <BytesRef> binary, IList <BytesRef> sorted, int numDocs, AtomicReader ar, CountdownEvent startingGun, Random threadRandom)
 {
     this.OuterInstance = outerInstance;
     this.Numbers       = numbers;
     this.Binary        = binary;
     this.Sorted        = sorted;
     this.NumDocs       = numDocs;
     this.Ar            = ar;
     this.StartingGun   = startingGun;
     this.ThreadRandom  = threadRandom;
 }
Exemple #23
0
        public virtual void TestNumericField()
        {
            using Directory dir = NewDirectory();
            DirectoryReader r = null;

            try
            {
                var numDocs = AtLeast(500);
                var answers = new Number[numDocs];
                using (var w = new RandomIndexWriter(Random, dir))
                {
                    NumericType[] typeAnswers = new NumericType[numDocs];
                    for (int id = 0; id < numDocs; id++)
                    {
                        Document    doc = new Document();
                        Field       nf;
                        Field       sf;
                        Number      answer;
                        NumericType typeAnswer;
                        if (Random.NextBoolean())
                        {
                            // float/double
                            if (Random.NextBoolean())
                            {
                                float f = Random.NextSingle();
                                answer     = Single.GetInstance(f);
                                nf         = new SingleField("nf", f, Field.Store.NO);
                                sf         = new StoredField("nf", f);
                                typeAnswer = NumericType.SINGLE;
                            }
                            else
                            {
                                double d = Random.NextDouble();
                                answer     = Double.GetInstance(d);
                                nf         = new DoubleField("nf", d, Field.Store.NO);
                                sf         = new StoredField("nf", d);
                                typeAnswer = NumericType.DOUBLE;
                            }
                        }
                        else
                        {
                            // int/long
                            if (Random.NextBoolean())
                            {
                                int i = Random.Next();
                                answer     = Int32.GetInstance(i);
                                nf         = new Int32Field("nf", i, Field.Store.NO);
                                sf         = new StoredField("nf", i);
                                typeAnswer = NumericType.INT32;
                            }
                            else
                            {
                                long l = Random.NextInt64();
                                answer     = Int64.GetInstance(l);
                                nf         = new Int64Field("nf", l, Field.Store.NO);
                                sf         = new StoredField("nf", l);
                                typeAnswer = NumericType.INT64;
                            }
                        }
                        doc.Add(nf);
                        doc.Add(sf);
                        answers[id]     = answer;
                        typeAnswers[id] = typeAnswer;
                        FieldType ft = new FieldType(Int32Field.TYPE_STORED);
                        ft.NumericPrecisionStep = int.MaxValue;
                        doc.Add(new Int32Field("id", id, ft));
                        w.AddDocument(doc);
                    }
                    r = w.GetReader();
                } // w.Dispose();

                Assert.AreEqual(numDocs, r.NumDocs);

                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    AtomicReader      sub = ctx.AtomicReader;
                    FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                    for (int docID = 0; docID < sub.NumDocs; docID++)
                    {
                        Document doc = sub.Document(docID);
                        Field    f   = doc.GetField <Field>("nf");
                        Assert.IsTrue(f is StoredField, "got f=" + f);
#pragma warning disable 612, 618
                        Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue());
#pragma warning restore 612, 618
                    }
                }
            }
            finally
            {
                r?.Dispose();
            }
        }
Exemple #24
0
 internal Iterator(DocTermOrds outerInstance, AtomicReader reader)
 {
     this.outerInstance = outerInstance;
     this.reader        = reader;
     this.te            = GetTermsEnum();
 }
 /// <summary>
 /// Construct a <see cref="FilterAtomicReader"/> based on the specified base reader.
 /// <para/>
 /// Note that base reader is closed if this <see cref="FilterAtomicReader"/> is closed.
 /// </summary>
 /// <param name="input"> specified base reader. </param>
 public FilterAtomicReader(AtomicReader input)
     : base()
 {
     this.m_input = input;
     input.RegisterParentReader(this);
 }
        private void Verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        {
            DocTermOrds dto = new DocTermOrds(r, r.LiveDocs, "field", prefixRef, int.MaxValue, TestUtil.NextInt32(Random, 2, 10));

            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(r, "id", false);

            /*
             * for(int docID=0;docID<subR.MaxDoc;docID++) {
             * System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
             * }
             */

            if (Verbose)
            {
                Console.WriteLine("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.Utf8ToString()));
                Console.WriteLine("TEST: all TERMS:");
                TermsEnum allTE = MultiFields.GetTerms(r, "field").GetIterator(null);
                int       ord   = 0;
                while (allTE.Next() != null)
                {
                    Console.WriteLine("  ord=" + (ord++) + " term=" + allTE.Term.Utf8ToString());
                }
            }

            //final TermsEnum te = subR.Fields.Terms("field").iterator();
            TermsEnum te = dto.GetOrdTermsEnum(r);

            if (dto.NumTerms == 0)
            {
                if (prefixRef == null)
                {
                    Assert.IsNull(MultiFields.GetTerms(r, "field"));
                }
                else
                {
                    Terms terms = MultiFields.GetTerms(r, "field");
                    if (terms != null)
                    {
                        TermsEnum            termsEnum = terms.GetIterator(null);
                        TermsEnum.SeekStatus result    = termsEnum.SeekCeil(prefixRef);
                        if (result != TermsEnum.SeekStatus.END)
                        {
                            Assert.IsFalse(StringHelper.StartsWith(termsEnum.Term, prefixRef), "term=" + termsEnum.Term.Utf8ToString() + " matches prefix=" + prefixRef.Utf8ToString());
                        }
                        else
                        {
                            // ok
                        }
                    }
                    else
                    {
                        // ok
                    }
                }
                return;
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: TERMS:");
                te.SeekExact(0);
                while (true)
                {
                    Console.WriteLine("  ord=" + te.Ord + " term=" + te.Term.Utf8ToString());
                    if (te.Next() == null)
                    {
                        break;
                    }
                }
            }

            SortedSetDocValues iter = dto.GetIterator(r);

            for (int docID = 0; docID < r.MaxDoc; docID++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: docID=" + docID + " of " + r.MaxDoc + " (id=" + docIDToID.Get(docID) + ")");
                }
                iter.SetDocument(docID);
                int[] answers = idToOrds[docIDToID.Get(docID)];
                int   upto    = 0;
                long  ord;
                while ((ord = iter.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    te.SeekExact(ord);
                    BytesRef expected = termsArray[answers[upto++]];
                    if (Verbose)
                    {
                        Console.WriteLine("  exp=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString());
                    }
                    Assert.AreEqual(expected, te.Term, "expected=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString() + " ord=" + ord);
                }
                Assert.AreEqual(answers.Length, upto);
            }
        }
Exemple #27
0
 /// <summary>
 /// Wrap one of the parent <see cref="DirectoryReader"/>'s subreaders </summary>
 /// <param name="reader"> the subreader to wrap </param>
 /// <returns> a wrapped/filtered <see cref="AtomicReader"/> </returns>
 public abstract AtomicReader Wrap(AtomicReader reader);
Exemple #28
0
        public virtual void TestManyReopensAndFields()
        {
            Directory         dir    = NewDirectory();
            Random            random = Random();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
            LogMergePolicy    lmp    = NewLogMergePolicy();

            lmp.MergeFactor = 3; // merge often
            conf.SetMergePolicy(lmp);
            IndexWriter writer = new IndexWriter(dir, conf);

            bool            isNRT = random.NextBoolean();
            DirectoryReader reader;

            if (isNRT)
            {
                reader = DirectoryReader.Open(writer, true);
            }
            else
            {
                writer.Commit();
                reader = DirectoryReader.Open(dir);
            }

            int numFields    = random.Next(4) + 3;             // 3-7
            int numNDVFields = random.Next(numFields / 2) + 1; // 1-3

            long[] fieldValues   = new long[numFields];
            bool[] fieldHasValue = new bool[numFields];
            Arrays.Fill(fieldHasValue, true);
            for (int i = 0; i < fieldValues.Length; i++)
            {
                fieldValues[i] = 1;
            }

            int numRounds = AtLeast(15);
            int docID     = 0;

            for (int i = 0; i < numRounds; i++)
            {
                int numDocs = AtLeast(5);
                //      System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
                for (int j = 0; j < numDocs; j++)
                {
                    Document doc = new Document();
                    doc.Add(new StringField("id", "doc-" + docID, Store.NO));
                    doc.Add(new StringField("key", "all", Store.NO)); // update key
                    // add all fields with their current value
                    for (int f = 0; f < fieldValues.Length; f++)
                    {
                        if (f < numNDVFields)
                        {
                            doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
                        }
                        else
                        {
                            doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
                        }
                    }
                    writer.AddDocument(doc);
                    ++docID;
                }

                // if field's value was unset before, unset it from all new added documents too
                for (int field = 0; field < fieldHasValue.Length; field++)
                {
                    if (!fieldHasValue[field])
                    {
                        if (field < numNDVFields)
                        {
                            writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
                        }
                        else
                        {
                            writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
                        }
                    }
                }

                int    fieldIdx    = random.Next(fieldValues.Length);
                string updateField = "f" + fieldIdx;
                if (random.NextBoolean())
                {
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
                    fieldHasValue[fieldIdx] = false;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
                    }
                }
                else
                {
                    fieldHasValue[fieldIdx] = true;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
                    }
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
                }

                if (random.NextDouble() < 0.2)
                {
                    int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
                    writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
                }

                // verify reader
                if (!isNRT)
                {
                    writer.Commit();
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
                DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
                Assert.IsNotNull(newReader);
                reader.Dispose();
                reader = newReader;
                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
                Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
                BytesRef scratch = new BytesRef();
                foreach (AtomicReaderContext context in reader.Leaves)
                {
                    AtomicReader r = context.AtomicReader;
                    //        System.out.println(((SegmentReader) r).getSegmentName());
                    IBits liveDocs = r.LiveDocs;
                    for (int field = 0; field < fieldValues.Length; field++)
                    {
                        string           f             = "f" + field;
                        BinaryDocValues  bdv           = r.GetBinaryDocValues(f);
                        NumericDocValues ndv           = r.GetNumericDocValues(f);
                        IBits            docsWithField = r.GetDocsWithField(f);
                        if (field < numNDVFields)
                        {
                            Assert.IsNotNull(ndv);
                            Assert.IsNull(bdv);
                        }
                        else
                        {
                            Assert.IsNull(ndv);
                            Assert.IsNotNull(bdv);
                        }
                        int maxDoc = r.MaxDoc;
                        for (int doc = 0; doc < maxDoc; doc++)
                        {
                            if (liveDocs == null || liveDocs.Get(doc))
                            {
                                //              System.out.println("doc=" + (doc + context.DocBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
                                if (fieldHasValue[field])
                                {
                                    Assert.IsTrue(docsWithField.Get(doc));
                                    if (field < numNDVFields)
                                    {
                                        Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                    else
                                    {
                                        Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                }
                                else
                                {
                                    Assert.IsFalse(docsWithField.Get(doc));
                                }
                            }
                        }
                    }
                }
                //      System.out.println();
            }

            IOUtils.Dispose(writer, reader, dir);
        }
Exemple #29
0
        [Test, LongRunningTest, HasTimeout] // LUCENENET TODO: Can this test be optimized to run faster on .NET Core 1.0?
        public virtual void TestTonsOfUpdates()
        {
            // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
            Directory         dir    = NewDirectory();
            Random            random = Random();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
            IndexWriter writer = new IndexWriter(dir, conf);

            // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
            int numDocs                  = AtLeast(20000);
            int numBinaryFields          = AtLeast(5);
            int numTerms                 = TestUtil.NextInt(random, 10, 100); // terms should affect many docs
            HashSet <string> updateTerms = new HashSet <string>();

            while (updateTerms.Count < numTerms)
            {
                updateTerms.Add(TestUtil.RandomSimpleString(random));
            }

            //    System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);

            // build a large index with many BDV fields and update terms
            for (int i = 0; i < numDocs; i++)
            {
                Document doc            = new Document();
                int      numUpdateTerms = TestUtil.NextInt(random, 1, numTerms / 10);
                for (int j = 0; j < numUpdateTerms; j++)
                {
                    doc.Add(new StringField("upd", RandomInts.RandomFrom(random, updateTerms), Store.NO));
                }
                for (int j = 0; j < numBinaryFields; j++)
                {
                    long val = random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
                    doc.Add(new NumericDocValuesField("cf" + j, val * 2));
                }
                writer.AddDocument(doc);
            }

            writer.Commit(); // commit so there's something to apply to

            // set to flush every 2048 bytes (approximately every 12 updates), so we get
            // many flushes during binary updates
            writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
            int numUpdates = AtLeast(100);

            //    System.out.println("numUpdates=" + numUpdates);
            for (int i = 0; i < numUpdates; i++)
            {
                int  field      = random.Next(numBinaryFields);
                Term updateTerm = new Term("upd", RandomInts.RandomFrom(random, updateTerms));
                long value      = random.Next();
                writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
                writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
            }

            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                for (int i = 0; i < numBinaryFields; i++)
                {
                    AtomicReader     r  = context.AtomicReader;
                    BinaryDocValues  f  = r.GetBinaryDocValues("f" + i);
                    NumericDocValues cf = r.GetNumericDocValues("cf" + i);
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
Exemple #30
0
        public virtual void TestStressMultiThreading()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter       writer = new IndexWriter(dir, conf);

            // create index
            int numThreads = TestUtil.NextInt(Random(), 3, 6);
            int numDocs    = AtLeast(2000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("id", "doc" + i, Store.NO));
                double group = Random().NextDouble();
                string g;
                if (group < 0.1)
                {
                    g = "g0";
                }
                else if (group < 0.5)
                {
                    g = "g1";
                }
                else if (group < 0.8)
                {
                    g = "g2";
                }
                else
                {
                    g = "g3";
                }
                doc.Add(new StringField("updKey", g, Store.NO));
                for (int j = 0; j < numThreads; j++)
                {
                    long value = Random().Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value)));
                    doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2
                }
                writer.AddDocument(doc);
            }

            CountdownEvent done       = new CountdownEvent(numThreads);
            AtomicInt32    numUpdates = new AtomicInt32(AtLeast(100));

            // same thread updates a field as well as reopens
            ThreadClass[] threads = new ThreadClass[numThreads];
            for (int i = 0; i < threads.Length; i++)
            {
                string f  = "f" + i;
                string cf = "cf" + i;
                threads[i] = new ThreadAnonymousInnerClassHelper(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf);
            }

            foreach (ThreadClass t in threads)
            {
                t.Start();
            }
            done.Wait();
            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                for (int i = 0; i < numThreads; i++)
                {
                    BinaryDocValues  bdv             = r.GetBinaryDocValues("f" + i);
                    NumericDocValues control         = r.GetNumericDocValues("cf" + i);
                    IBits            docsWithBdv     = r.GetDocsWithField("f" + i);
                    IBits            docsWithControl = r.GetDocsWithField("cf" + i);
                    IBits            liveDocs        = r.LiveDocs;
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        if (liveDocs == null || liveDocs.Get(j))
                        {
                            Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j));
                            if (docsWithBdv.Get(j))
                            {
                                long ctrlValue = control.Get(j);
                                long bdvValue  = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2;
                                //              if (ctrlValue != bdvValue) {
                                //                System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch);
                                //              }
                                Assert.AreEqual(ctrlValue, bdvValue);
                            }
                        }
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
Exemple #31
0
        public virtual void TestIntersectStartTerm()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(new LogDocMergePolicy());

            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "abd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "acd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bcd", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.Reader;

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.Terms("field");

            Automaton         automaton = (new RegExp(".*d", RegExp.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te;

            // should seek to startTerm
            te = terms.Intersect(ca, new BytesRef("aad"));
            Assert.AreEqual("abd", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("acd", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should fail to find ceil label on second arc, rewind
            te = terms.Intersect(ca, new BytesRef("add"));
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should reach end
            te = terms.Intersect(ca, new BytesRef("bcd"));
            Assert.IsNull(te.Next());
            te = terms.Intersect(ca, new BytesRef("ddd"));
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Exemple #32
0
        /// <summary>
        /// Expert: create a <see cref="ParallelAtomicReader"/> based on the provided
        /// <paramref name="readers"/> and <paramref name="storedFieldsReaders"/>; when a document is
        /// loaded, only <paramref name="storedFieldsReaders"/> will be used.
        /// </summary>
        public ParallelAtomicReader(bool closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders)
        {
            this.closeSubReaders = closeSubReaders;
            if (readers.Length == 0 && storedFieldsReaders.Length > 0)
            {
                throw new ArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
            }
            this.parallelReaders     = (AtomicReader[])readers.Clone();
            this.storedFieldsReaders = (AtomicReader[])storedFieldsReaders.Clone();
            if (parallelReaders.Length > 0)
            {
                AtomicReader first = parallelReaders[0];
                this.maxDoc       = first.MaxDoc;
                this.numDocs      = first.NumDocs;
                this.hasDeletions = first.HasDeletions;
            }
            else
            {
                this.maxDoc       = this.numDocs = 0;
                this.hasDeletions = false;
            }
            completeReaderSet.UnionWith(this.parallelReaders);
            completeReaderSet.UnionWith(this.storedFieldsReaders);

            // check compatibility:
            foreach (AtomicReader reader in completeReaderSet)
            {
                if (reader.MaxDoc != maxDoc)
                {
                    throw new ArgumentException("All readers must have same MaxDoc: " + maxDoc + "!=" + reader.MaxDoc);
                }
            }

            // TODO: make this read-only in a cleaner way?
            FieldInfos.Builder builder = new FieldInfos.Builder();
            // build FieldInfos and fieldToReader map:
            foreach (AtomicReader reader in this.parallelReaders)
            {
                FieldInfos readerFieldInfos = reader.FieldInfos;
                foreach (FieldInfo fieldInfo in readerFieldInfos)
                {
                    // NOTE: first reader having a given field "wins":
                    if (!fieldToReader.ContainsKey(fieldInfo.Name))
                    {
                        builder.Add(fieldInfo);
                        fieldToReader[fieldInfo.Name] = reader;
                        if (fieldInfo.HasVectors)
                        {
                            tvFieldToReader[fieldInfo.Name] = reader;
                        }
                    }
                }
            }
            fieldInfos = builder.Finish();

            // build Fields instance
            foreach (AtomicReader reader in this.parallelReaders)
            {
                Fields readerFields = reader.Fields;
                if (readerFields != null)
                {
                    foreach (string field in readerFields)
                    {
                        // only add if the reader responsible for that field name is the current:
                        if (fieldToReader[field].Equals(reader))
                        {
                            this.fields.AddField(field, readerFields.GetTerms(field));
                        }
                    }
                }
            }

            // do this finally so any Exceptions occurred before don't affect refcounts:
            foreach (AtomicReader reader in completeReaderSet)
            {
                if (!closeSubReaders)
                {
                    reader.IncRef();
                }
                reader.RegisterParentReader(this);
            }
        }
Exemple #33
0
 /// <summary>
 /// Inverts only terms starting w/ prefix, and only terms
 /// whose docFreq (not taking deletions into account) is
 /// &lt;= <paramref name="maxTermDocFreq"/>
 /// </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq)
     : this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS)
 {
 }
Exemple #34
0
 public ThreadAnonymousClass(TestDocValuesWithThreads outerInstance, IList <long?> numbers, IList <BytesRef> binary, IList <BytesRef> sorted, int numDocs, AtomicReader ar, CountdownEvent startingGun, Random threadRandom)
 {
     this.outerInstance = outerInstance;
     this.numbers       = numbers;
     this.binary        = binary;
     this.sorted        = sorted;
     this.numDocs       = numDocs;
     this.ar            = ar;
     this.startingGun   = startingGun;
     this.threadRandom  = threadRandom;
 }
Exemple #35
0
 public override AtomicReader Wrap(AtomicReader reader)
 {
     return(reader);
 }
        public virtual void Test2()
        {
            Random            random   = Random;
            int               NUM_DOCS = AtLeast(100);
            Directory         dir      = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                random, dir);
            bool             allowDups = random.NextBoolean();
            HashSet <string> seen      = new HashSet <string>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
            }
            int numDocs = 0;
            IList <BytesRef> docValues = new List <BytesRef>();

            // TODO: deletions
            while (numDocs < NUM_DOCS)
            {
                string s;
                if (random.NextBoolean())
                {
                    s = TestUtil.RandomSimpleString(random);
                }
                else
                {
                    s = TestUtil.RandomUnicodeString(random);
                }
                BytesRef br = new BytesRef(s);

                if (!allowDups)
                {
                    if (seen.Contains(s))
                    {
                        continue;
                    }
                    seen.Add(s);
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  " + numDocs + ": s=" + s);
                }

                Document doc = new Document();
                doc.Add(new SortedDocValuesField("stringdv", br));
                doc.Add(new NumericDocValuesField("id", numDocs));
                docValues.Add(br);
                writer.AddDocument(doc);
                numDocs++;

                if (random.Next(40) == 17)
                {
                    // force flush
                    writer.GetReader().Dispose();
                }
            }

            writer.ForceMerge(1);
            DirectoryReader r = writer.GetReader();

            writer.Dispose();

            AtomicReader sr = GetOnlySegmentReader(r);

            long END_TIME = Environment.TickCount + (TEST_NIGHTLY ? 30 : 1);

            int NUM_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 1, 10);

            ThreadClass[] threads = new ThreadClass[NUM_THREADS];
            for (int thread = 0; thread < NUM_THREADS; thread++)
            {
                threads[thread] = new ThreadAnonymousInnerClassHelper2(random, docValues, sr, END_TIME);
                threads[thread].Start();
            }

            foreach (ThreadClass thread in threads)
            {
                thread.Join();
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random);
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.GetReader();

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }
 /// <summary>
 /// Test term vectors.
 /// @lucene.experimental
 /// </summary>
 public static Status.TermVectorStatus TestTermVectors(AtomicReader reader, TextWriter infoStream)
 {
     return TestTermVectors(reader, infoStream, false, false);
 }
        public virtual void TestRandomWithPrefix()
        {
            Directory dir = NewDirectory();

            ISet <string> prefixes  = new JCG.HashSet <string>();
            int           numPrefix = TestUtil.NextInt32(Random, 2, 7);

            if (Verbose)
            {
                Console.WriteLine("TEST: use " + numPrefix + " prefixes");
            }
            while (prefixes.Count < numPrefix)
            {
                prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random));
                //prefixes.Add(TestUtil.RandomSimpleString(random));
            }
            string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/);

            int             NUM_TERMS = AtLeast(20);
            ISet <BytesRef> terms     = new JCG.HashSet <BytesRef>();

            while (terms.Count < NUM_TERMS)
            {
                string s = prefixesArray[Random.Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random);
                //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random);
                if (s.Length > 0)
                {
                    terms.Add(new BytesRef(s));
                }
            }
            BytesRef[] termsArray = terms.ToArray();
            Array.Sort(termsArray);

            int NUM_DOCS = AtLeast(100);

            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            // Sometimes swap in codec that impls ord():
            if (Random.Next(10) == 7)
            {
                Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds"));
                conf.SetCodec(codec);
            }

            RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf);

            int[][]     idToOrds      = new int[NUM_DOCS][];
            ISet <int?> ordsForDocSet = new JCG.HashSet <int?>();

            for (int id = 0; id < NUM_DOCS; id++)
            {
                Document doc = new Document();

                doc.Add(new Int32Field("id", id, Field.Store.NO));

                int termCount = TestUtil.NextInt32(Random, 0, 20 * RandomMultiplier);
                while (ordsForDocSet.Count < termCount)
                {
                    ordsForDocSet.Add(Random.Next(termsArray.Length));
                }
                int[] ordsForDoc = new int[termCount];
                int   upto       = 0;
                if (Verbose)
                {
                    Console.WriteLine("TEST: doc id=" + id);
                }
                foreach (int ord in ordsForDocSet)
                {
                    ordsForDoc[upto++] = ord;
                    Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO);
                    if (Verbose)
                    {
                        Console.WriteLine("  f=" + termsArray[ord].Utf8ToString());
                    }
                    doc.Add(field);
                }
                ordsForDocSet.Clear();
                Array.Sort(ordsForDoc);
                idToOrds[id] = ordsForDoc;
                w.AddDocument(doc);
            }

            DirectoryReader r = w.GetReader();

            w.Dispose();

            if (Verbose)
            {
                Console.WriteLine("TEST: reader=" + r);
            }

            AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r);

            foreach (string prefix in prefixesArray)
            {
                BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);

                int[][] idToOrdsPrefix = new int[NUM_DOCS][];
                for (int id = 0; id < NUM_DOCS; id++)
                {
                    int[]        docOrds = idToOrds[id];
                    IList <int?> newOrds = new List <int?>();
                    foreach (int ord in idToOrds[id])
                    {
                        if (StringHelper.StartsWith(termsArray[ord], prefixRef))
                        {
                            newOrds.Add(ord);
                        }
                    }
                    int[] newOrdsArray = new int[newOrds.Count];
                    int   upto         = 0;
                    foreach (int ord in newOrds)
                    {
                        newOrdsArray[upto++] = ord;
                    }
                    idToOrdsPrefix[id] = newOrdsArray;
                }

                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: sub=" + ctx.Reader);
                    }
                    Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef);
                }

                // Also test top-level reader: its enum does not support
                // ord, so this forces the OrdWrapper to run:
                if (Verbose)
                {
                    Console.WriteLine("TEST: top reader");
                }
                Verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
            }

            FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey);

            r.Dispose();
            dir.Dispose();
        }
 public override AtomicReader Wrap(AtomicReader reader)
 {
     return reader;
 }
Exemple #41
0
 internal Iterator(DocTermOrds outerInstance, AtomicReader reader)
 {
     this.OuterInstance = outerInstance;
     this.Reader        = reader;
     this.Te            = TermsEnum();
 }
 internal virtual AtomicReader[] Wrap(IList<AtomicReader> readers)
 {
     AtomicReader[] wrapped = new AtomicReader[readers.Count];
     for (int i = 0; i < readers.Count; i++)
     {
         wrapped[i] = Wrap(readers[i]);
     }
     return wrapped;
 }
Exemple #43
0
        public virtual void Test2()
        {
            Random            random   = Random;
            int               NUM_DOCS = AtLeast(100);
            Directory         dir      = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                random, dir);
            bool          allowDups = random.NextBoolean();
            ISet <string> seen      = new JCG.HashSet <string>();

            if (Verbose)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
            }
            int numDocs = 0;
            IList <BytesRef> docValues = new List <BytesRef>();

            // TODO: deletions
            while (numDocs < NUM_DOCS)
            {
                string s;
                if (random.NextBoolean())
                {
                    s = TestUtil.RandomSimpleString(random);
                }
                else
                {
                    s = TestUtil.RandomUnicodeString(random);
                }
                BytesRef br = new BytesRef(s);

                if (!allowDups)
                {
                    if (seen.Contains(s))
                    {
                        continue;
                    }
                    seen.Add(s);
                }

                if (Verbose)
                {
                    Console.WriteLine("  " + numDocs + ": s=" + s);
                }

                Document doc = new Document();
                doc.Add(new SortedDocValuesField("stringdv", br));
                doc.Add(new NumericDocValuesField("id", numDocs));
                docValues.Add(br);
                writer.AddDocument(doc);
                numDocs++;

                if (random.Next(40) == 17)
                {
                    // force flush
                    writer.GetReader().Dispose();
                }
            }

            writer.ForceMerge(1);
            DirectoryReader r = writer.GetReader();

            writer.Dispose();

            AtomicReader sr = GetOnlySegmentReader(r);

            long END_TIME = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (TestNightly ? 30 : 1); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            int NUM_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 1, 10);

            ThreadJob[] threads = new ThreadJob[NUM_THREADS];
            for (int thread = 0; thread < NUM_THREADS; thread++)
            {
                threads[thread] = new ThreadAnonymousClass2(random, docValues, sr, END_TIME);
                threads[thread].Start();
            }

            foreach (ThreadJob thread in threads)
            {
                thread.Join();
            }

            r.Dispose();
            dir.Dispose();
        }
 public ThreadAnonymousInnerClassHelper2(Random random, IList <BytesRef> docValues, AtomicReader sr, long END_TIME)
 {
     this.Random    = random;
     this.DocValues = docValues;
     this.Sr        = sr;
     this.END_TIME  = END_TIME;
 }
        /// <summary>
        /// Test stored fields.
        /// @lucene.experimental
        /// </summary>
        public static Status.StoredFieldStatus TestStoredFields(AtomicReader reader, TextWriter infoStream)
        {
            Status.StoredFieldStatus status = new Status.StoredFieldStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: stored fields.......");
                }

                // Scan stored fields for all documents
                Bits liveDocs = reader.LiveDocs;
                for (int j = 0; j < reader.MaxDoc; ++j)
                {
                    // Intentionally pull even deleted documents to
                    // make sure they too are not corrupt:
                    Document doc = reader.Document(j);
                    if (liveDocs == null || liveDocs.Get(j))
                    {
                        status.DocCount++;
                        status.TotFields += doc.Fields.Count;
                    }
                }

                // Validate docCount
                if (status.DocCount != reader.NumDocs)
                {
                    throw new Exception("docCount=" + status.DocCount + " but saw " + status.DocCount + " undeleted docs");
                }

                Msg(infoStream, "OK [" + status.TotFields + " total field count; avg " + ((((float)status.TotFields) / status.DocCount)).ToString(CultureInfo.InvariantCulture.NumberFormat) + " fields per doc]");
            }
            catch (Exception e)
            {
                Msg(infoStream, "ERROR [" + Convert.ToString(e.Message) + "]");
                status.Error = e;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(e.ToString());
                    //infoStream.WriteLine(e.StackTrace);
                }
            }

            return status;
        }
Exemple #46
0
 /// <summary>
 /// Inverts all terms </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field)
     : this(reader, liveDocs, field, null, int.MaxValue)
 {
 }
        /// <summary>
        /// Test term vectors.
        /// @lucene.experimental
        /// </summary>
        public static Status.TermVectorStatus TestTermVectors(AtomicReader reader, TextWriter infoStream, bool verbose, bool crossCheckTermVectors)
        {
            Status.TermVectorStatus status = new Status.TermVectorStatus();
            FieldInfos fieldInfos = reader.FieldInfos;
            Bits onlyDocIsDeleted = new FixedBitSet(1);

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: term vectors........");
                }

                DocsEnum docs = null;
                DocsAndPositionsEnum postings = null;

                // Only used if crossCheckTermVectors is true:
                DocsEnum postingsDocs = null;
                DocsAndPositionsEnum postingsPostings = null;

                Bits liveDocs = reader.LiveDocs;

                Fields postingsFields;
                // TODO: testTermsIndex
                if (crossCheckTermVectors)
                {
                    postingsFields = reader.Fields;
                }
                else
                {
                    postingsFields = null;
                }

                TermsEnum termsEnum = null;
                TermsEnum postingsTermsEnum = null;

                for (int j = 0; j < reader.MaxDoc; ++j)
                {
                    // Intentionally pull/visit (but don't count in
                    // stats) deleted documents to make sure they too
                    // are not corrupt:
                    Fields tfv = reader.GetTermVectors(j);

                    // TODO: can we make a IS(FIR) that searches just
                    // this term vector... to pass for searcher?

                    if (tfv != null)
                    {
                        // First run with no deletions:
                        CheckFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose);

                        // Again, with the one doc deleted:
                        CheckFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);

                        // Only agg stats if the doc is live:
                        bool doStats = liveDocs == null || liveDocs.Get(j);
                        if (doStats)
                        {
                            status.DocCount++;
                        }

                        foreach (string field in tfv)
                        {
                            if (doStats)
                            {
                                status.TotVectors++;
                            }

                            // Make sure FieldInfo thinks this field is vector'd:
                            FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                            if (!fieldInfo.HasVectors())
                            {
                                throw new Exception("docID=" + j + " has term vectors for field=" + field + " but FieldInfo has storeTermVector=false");
                            }

                            if (crossCheckTermVectors)
                            {
                                Terms terms = tfv.Terms(field);
                                termsEnum = terms.Iterator(termsEnum);
                                bool postingsHasFreq = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
                                bool postingsHasPayload = fieldInfo.HasPayloads();
                                bool vectorsHasPayload = terms.HasPayloads();

                                Terms postingsTerms = postingsFields.Terms(field);
                                if (postingsTerms == null)
                                {
                                    throw new Exception("vector field=" + field + " does not exist in postings; doc=" + j);
                                }
                                postingsTermsEnum = postingsTerms.Iterator(postingsTermsEnum);

                                bool hasProx = terms.HasOffsets() || terms.HasPositions();
                                BytesRef term = null;
                                while ((term = termsEnum.Next()) != null)
                                {
                                    if (hasProx)
                                    {
                                        postings = termsEnum.DocsAndPositions(null, postings);
                                        Debug.Assert(postings != null);
                                        docs = null;
                                    }
                                    else
                                    {
                                        docs = termsEnum.Docs(null, docs);
                                        Debug.Assert(docs != null);
                                        postings = null;
                                    }

                                    DocsEnum docs2;
                                    if (hasProx)
                                    {
                                        Debug.Assert(postings != null);
                                        docs2 = postings;
                                    }
                                    else
                                    {
                                        Debug.Assert(docs != null);
                                        docs2 = docs;
                                    }

                                    DocsEnum postingsDocs2;
                                    if (!postingsTermsEnum.SeekExact(term))
                                    {
                                        throw new Exception("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
                                    }
                                    postingsPostings = postingsTermsEnum.DocsAndPositions(null, postingsPostings);
                                    if (postingsPostings == null)
                                    {
                                        // Term vectors were indexed w/ pos but postings were not
                                        postingsDocs = postingsTermsEnum.Docs(null, postingsDocs);
                                        if (postingsDocs == null)
                                        {
                                            throw new Exception("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
                                        }
                                    }

                                    if (postingsPostings != null)
                                    {
                                        postingsDocs2 = postingsPostings;
                                    }
                                    else
                                    {
                                        postingsDocs2 = postingsDocs;
                                    }

                                    int advanceDoc = postingsDocs2.Advance(j);
                                    if (advanceDoc != j)
                                    {
                                        throw new Exception("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")");
                                    }

                                    int doc = docs2.NextDoc();

                                    if (doc != 0)
                                    {
                                        throw new Exception("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
                                    }

                                    if (postingsHasFreq)
                                    {
                                        int tf = docs2.Freq();
                                        if (postingsHasFreq && postingsDocs2.Freq() != tf)
                                        {
                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.Freq());
                                        }

                                        if (hasProx)
                                        {
                                            for (int i = 0; i < tf; i++)
                                            {
                                                int pos = postings.NextPosition();
                                                if (postingsPostings != null)
                                                {
                                                    int postingsPos = postingsPostings.NextPosition();
                                                    if (terms.HasPositions() && pos != postingsPos)
                                                    {
                                                        throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
                                                    }
                                                }

                                                // Call the methods to at least make
                                                // sure they don't throw exc:
                                                int startOffset = postings.StartOffset();
                                                int endOffset = postings.EndOffset();
                                                // TODO: these are too anal...?
                                                /*
                                                  if (endOffset < startOffset) {
                                                  throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset);
                                                  }
                                                  if (startOffset < lastStartOffset) {
                                                  throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset);
                                                  }
                                                  lastStartOffset = startOffset;
                                                */

                                                if (postingsPostings != null)
                                                {
                                                    int postingsStartOffset = postingsPostings.StartOffset();

                                                    int postingsEndOffset = postingsPostings.EndOffset();
                                                    if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset)
                                                    {
                                                        throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
                                                    }
                                                    if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset)
                                                    {
                                                        throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
                                                    }
                                                }

                                                BytesRef payload = postings.Payload;

                                                if (payload != null)
                                                {
                                                    Debug.Assert(vectorsHasPayload);
                                                }

                                                if (postingsHasPayload && vectorsHasPayload)
                                                {
                                                    Debug.Assert(postingsPostings != null);

                                                    if (payload == null)
                                                    {
                                                        // we have payloads, but not at this position.
                                                        // postings has payloads too, it should not have one at this position
                                                        if (postingsPostings.Payload != null)
                                                        {
                                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.Payload);
                                                        }
                                                    }
                                                    else
                                                    {
                                                        // we have payloads, and one at this position
                                                        // postings should also have one at this position, with the same bytes.
                                                        if (postingsPostings.Payload == null)
                                                        {
                                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
                                                        }
                                                        BytesRef postingsPayload = postingsPostings.Payload;
                                                        if (!payload.Equals(postingsPayload))
                                                        {
                                                            throw new Exception("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload);
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                float vectorAvg = status.DocCount == 0 ? 0 : status.TotVectors / (float)status.DocCount;
                Msg(infoStream, "OK [" + status.TotVectors + " total vector count; avg " + vectorAvg.ToString(CultureInfo.InvariantCulture.NumberFormat) + " term/freq vector fields per doc]");
            }
            catch (Exception e)
            {
                Msg(infoStream, "ERROR [" + Convert.ToString(e.Message) + "]");
                status.Error = e;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(e.ToString());
                    //infoStream.WriteLine(e.StackTrace);
                }
            }

            return status;
        }
Exemple #48
0
        public virtual void TestIgnoreStoredFields()
        {
            Directory       dir1 = GetDir1(Random);
            Directory       dir2 = GetDir2(Random);
            CompositeReader ir1  = DirectoryReader.Open(dir1);
            CompositeReader ir2  = DirectoryReader.Open(dir2);

            // with overlapping
            ParallelCompositeReader pr = new ParallelCompositeReader(false, new CompositeReader[] { ir1, ir2 }, new CompositeReader[] { ir1 });

            Assert.AreEqual("v1", pr.Document(0).Get("f1"));
            Assert.AreEqual("v1", pr.Document(0).Get("f2"));
            Assert.IsNull(pr.Document(0).Get("f3"));
            Assert.IsNull(pr.Document(0).Get("f4"));
            // check that fields are there
            AtomicReader slow = SlowCompositeReaderWrapper.Wrap(pr);

            Assert.IsNotNull(slow.GetTerms("f1"));
            Assert.IsNotNull(slow.GetTerms("f2"));
            Assert.IsNotNull(slow.GetTerms("f3"));
            Assert.IsNotNull(slow.GetTerms("f4"));
            pr.Dispose();

            // no stored fields at all
            pr = new ParallelCompositeReader(false, new CompositeReader[] { ir2 }, new CompositeReader[0]);
            Assert.IsNull(pr.Document(0).Get("f1"));
            Assert.IsNull(pr.Document(0).Get("f2"));
            Assert.IsNull(pr.Document(0).Get("f3"));
            Assert.IsNull(pr.Document(0).Get("f4"));
            // check that fields are there
            slow = SlowCompositeReaderWrapper.Wrap(pr);
            Assert.IsNull(slow.GetTerms("f1"));
            Assert.IsNull(slow.GetTerms("f2"));
            Assert.IsNotNull(slow.GetTerms("f3"));
            Assert.IsNotNull(slow.GetTerms("f4"));
            pr.Dispose();

            // without overlapping
            pr = new ParallelCompositeReader(true, new CompositeReader[] { ir2 }, new CompositeReader[] { ir1 });
            Assert.AreEqual("v1", pr.Document(0).Get("f1"));
            Assert.AreEqual("v1", pr.Document(0).Get("f2"));
            Assert.IsNull(pr.Document(0).Get("f3"));
            Assert.IsNull(pr.Document(0).Get("f4"));
            // check that fields are there
            slow = SlowCompositeReaderWrapper.Wrap(pr);
            Assert.IsNull(slow.GetTerms("f1"));
            Assert.IsNull(slow.GetTerms("f2"));
            Assert.IsNotNull(slow.GetTerms("f3"));
            Assert.IsNotNull(slow.GetTerms("f4"));
            pr.Dispose();

            // no main readers
            try
            {
                new ParallelCompositeReader(true, new CompositeReader[0], new CompositeReader[] { ir1 });
                Assert.Fail("didn't get expected exception: need a non-empty main-reader array");
            }
#pragma warning disable 168
            catch (System.ArgumentException iae)
#pragma warning restore 168
            {
                // pass
            }

            dir1.Dispose();
            dir2.Dispose();
        }
 /// <summary>
 /// Wrap one of the parent DirectoryReader's subreaders </summary>
 /// <param name="reader"> the subreader to wrap </param>
 /// <returns> a wrapped/filtered AtomicReader </returns>
 public abstract AtomicReader Wrap(AtomicReader reader);
 /// <summary>
 /// <p>Construct a FilterAtomicReader based on the specified base reader.
 /// <p>Note that base reader is closed if this FilterAtomicReader is closed.</p> </summary>
 /// <param name="in"> specified base reader. </param>
 public FilterAtomicReader(AtomicReader @in)
     : base()
 {
     this.@in = @in;
     @in.RegisterParentReader(this);
 }
Exemple #51
0
 /// <summary>
 /// Invoked on the <seealso cref="AtomicReader"/> for the newly
 ///  merged segment, before that segment is made visible
 ///  to near-real-time readers.
 /// </summary>
 public abstract void Warm(AtomicReader reader);
 public FilterAtomicReaderAnonymousInnerClassHelper(TestIndexReaderClose outerInstance, AtomicReader wrap, bool throwOnClose)
     : base(wrap)
 {
     this.OuterInstance = outerInstance;
     this.ThrowOnClose = throwOnClose;
 }
 public FakeDeleteAtomicIndexReader(AtomicReader reader) : base(reader)
 {
     UndeleteAll(); // initialize main bitset
 }
Exemple #54
0
 /// <summary>
 /// Inverts only terms starting w/ prefix, and only terms
 /// whose docFreq (not taking deletions into account) is
 /// &lt;=  <paramref name="maxTermDocFreq"/>, with a custom indexing interval
 /// (default is every 128nd term).
 /// </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits)
     : this(field, maxTermDocFreq, indexIntervalBits)
 {
     Uninvert(reader, liveDocs, termPrefix);
 }
        public virtual void TestRandom()
        {
            // token -> docID -> tokens
            IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >();

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(20);
            //final int numDocs = AtLeast(5);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: randomize what IndexOptions we use; also test
            // changing this up in one IW buffered segment...:
            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random.NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random.NextBoolean();
                ft.StoreTermVectorPositions = Random.NextBoolean();
            }

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = new Document();
                doc.Add(new Int32Field("id", docCount, Field.Store.NO));
                IList <Token> tokens    = new List <Token>();
                int           numTokens = AtLeast(100);
                //final int numTokens = AtLeast(20);
                int pos    = -1;
                int offset = 0;
                //System.out.println("doc id=" + docCount);
                for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
                {
                    string text;
                    if (Random.NextBoolean())
                    {
                        text = "a";
                    }
                    else if (Random.NextBoolean())
                    {
                        text = "b";
                    }
                    else if (Random.NextBoolean())
                    {
                        text = "c";
                    }
                    else
                    {
                        text = "d";
                    }

                    int posIncr = Random.NextBoolean() ? 1 : Random.Next(5);
                    if (tokenCount == 0 && posIncr == 0)
                    {
                        posIncr = 1;
                    }
                    int offIncr     = Random.NextBoolean() ? 0 : Random.Next(5);
                    int tokenOffset = Random.Next(5);

                    Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
                    if (!actualTokens.TryGetValue(text, out IDictionary <int?, IList <Token> > postingsByDoc))
                    {
                        actualTokens[text] = postingsByDoc = new Dictionary <int?, IList <Token> >();
                    }
                    if (!postingsByDoc.TryGetValue(docCount, out IList <Token> postings))
                    {
                        postingsByDoc[docCount] = postings = new List <Token>();
                    }
                    postings.Add(token);
                    tokens.Add(token);
                    pos += posIncr;
                    // stuff abs position into type:
                    token.Type = "" + pos;
                    offset    += offIncr + tokenOffset;
                    //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
                }
                doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.GetReader();

            w.Dispose();

            string[] terms = new string[] { "a", "b", "c", "d" };
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                // TODO: improve this
                AtomicReader sub = (AtomicReader)ctx.Reader;
                //System.out.println("\nsub=" + sub);
                TermsEnum            termsEnum                  = sub.Fields.GetTerms("content").GetIterator(null);
                DocsEnum             docs                       = null;
                DocsAndPositionsEnum docsAndPositions           = null;
                DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
                FieldCache.Int32s    docIDToID                  = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                foreach (string term in terms)
                {
                    //System.out.println("  term=" + term);
                    if (termsEnum.SeekExact(new BytesRef(term)))
                    {
                        docs = termsEnum.Docs(null, docs);
                        Assert.IsNotNull(docs);
                        int doc;
                        //System.out.println("    doc/freq");
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docs.Freq);
                        }

                        // explicitly exclude offsets here
                        docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsFlags.PAYLOADS);
                        Assert.IsNotNull(docsAndPositions);
                        //System.out.println("    doc/freq/pos");
                        while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositions.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositions.NextPosition());
                            }
                        }

                        docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
                        Assert.IsNotNull(docsAndPositionsAndOffsets);
                        //System.out.println("    doc/freq/pos/offs");
                        while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
                                Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset);
                                Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset);
                            }
                        }
                    }
                }
                // TODO: test advance:
            }
            r.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                //numTerms /= 2;
                // LUCENENET specific - To keep this under the 1 hour free limit
                // of Azure DevOps, this was reduced from /2 to /6.
                numTerms /= 6;
            }

            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousClass(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
#pragma warning disable 612, 618
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
#pragma warning restore 612, 618
            {
                Assert.AreEqual(numTerms - 1, terms.Count);
            }
            TermsEnum termsEnum = terms.GetEnumerator();
            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.DocFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType((FieldType)prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = FieldInfo.IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                                  // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Size());
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Exemple #58
0
 /// <summary>
 /// Inverts only terms starting w/ prefix </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix)
     : this(reader, liveDocs, field, termPrefix, int.MaxValue)
 {
 }
 public AllDeletedFilterReader(AtomicReader @in)
     : base(@in)
 {
     LiveDocs_Renamed = new Bits_MatchNoBits(@in.MaxDoc);
     Debug.Assert(MaxDoc == 0 || HasDeletions);
 }
        public void TestNumericField()
        {
            Directory dir     = NewDirectory();
            var       w       = new RandomIndexWriter(Random(), dir);
            var       numDocs = AtLeast(500);
            var       answers = new object[numDocs];

            FieldType.NumericType[] typeAnswers = new FieldType.NumericType[numDocs];
            for (int id = 0; id < numDocs; id++)
            {
                Document doc = new Document();
                Field    nf;
                Field    sf;
                object   answer;
                FieldType.NumericType typeAnswer;
                if (Random().NextBoolean())
                {
                    // float/double
                    if (Random().NextBoolean())
                    {
                        float f = Random().NextFloat();
                        answer     = Convert.ToSingle(f);
                        nf         = new FloatField("nf", f, Field.Store.NO);
                        sf         = new StoredField("nf", f);
                        typeAnswer = FieldType.NumericType.FLOAT;
                    }
                    else
                    {
                        double d = Random().NextDouble();
                        answer     = Convert.ToDouble(d);
                        nf         = new DoubleField("nf", d, Field.Store.NO);
                        sf         = new StoredField("nf", d);
                        typeAnswer = FieldType.NumericType.DOUBLE;
                    }
                }
                else
                {
                    // int/long
                    if (Random().NextBoolean())
                    {
                        int i = Random().Next();
                        answer     = Convert.ToInt32(i);
                        nf         = new IntField("nf", i, Field.Store.NO);
                        sf         = new StoredField("nf", i);
                        typeAnswer = FieldType.NumericType.INT;
                    }
                    else
                    {
                        long l = Random().NextLong();
                        answer     = Convert.ToInt64(l);
                        nf         = new LongField("nf", l, Field.Store.NO);
                        sf         = new StoredField("nf", l);
                        typeAnswer = FieldType.NumericType.LONG;
                    }
                }
                doc.Add(nf);
                doc.Add(sf);
                answers[id]     = answer;
                typeAnswers[id] = typeAnswer;
                FieldType ft = new FieldType(IntField.TYPE_STORED);
                ft.NumericPrecisionStep = int.MaxValue;
                doc.Add(new IntField("id", id, ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.Reader;

            w.Dispose();

            Assert.AreEqual(numDocs, r.NumDocs);

            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                AtomicReader    sub = (AtomicReader)ctx.Reader;
                FieldCache.Ints ids = FieldCache.DEFAULT.GetInts(sub, "id", false);
                for (int docID = 0; docID < sub.NumDocs; docID++)
                {
                    Document doc = sub.Document(docID);
                    Field    f   = (Field)doc.GetField("nf");
                    Assert.IsTrue(f is StoredField, "got f=" + f);
                    Assert.AreEqual(answers[ids.Get(docID)], f.NumericValue);
                }
            }
            r.Dispose();
            dir.Dispose();
        }