protected override MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field,
     NumericEntry entry)
     return new MonotonicBlockPackedReader((IndexInput)data.Clone(), entry.PackedIntsVersion, entry.BlockSize, entry.Count,
 public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
     DocValuesType_e? dvType = field.FieldType().DocValueType;
     if (dvType != null)
         fieldInfo.DocValuesType = dvType;
         if (dvType == DocValuesType_e.BINARY)
             AddBinaryField(fieldInfo, docID, field.BinaryValue());
         else if (dvType == DocValuesType_e.SORTED)
             AddSortedField(fieldInfo, docID, field.BinaryValue());
         else if (dvType == DocValuesType_e.SORTED_SET)
             AddSortedSetField(fieldInfo, docID, field.BinaryValue());
         else if (dvType == DocValuesType_e.NUMERIC)
             if (!(field.NumericValue is long?))
                 throw new System.ArgumentException("illegal type " + field.NumericValue.GetType() + ": DocValues types must be Long");
             AddNumericField(fieldInfo, docID, (long)field.NumericValue);
             Debug.Assert(false, "unrecognized DocValues.Type: " + dvType);
 public NormsWriterPerField(DocInverterPerField docInverterPerField, NormsWriterPerThread perThread, FieldInfo fieldInfo)
     this.perThread = perThread;
     this.fieldInfo = fieldInfo;
     docState = perThread.docState;
     fieldState = docInverterPerField.fieldState;
		internal void  SetField(FieldInfo fieldInfo)
			this.fieldInfo = fieldInfo;
			omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
			storePayloads = fieldInfo.storePayloads;
		internal void  WriteField(FieldInfo fi, Fieldable field)
			// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
			// and field.binaryValue() already returns the compressed value for a field
			// with isCompressed()==true, so we disable compression in that case
			bool disableCompression = (field is FieldsReader.FieldForMerge);
			byte bits = 0;
			if (field.IsTokenized())
				bits |= FieldsWriter.FIELD_IS_TOKENIZED;
			if (field.IsBinary())
				bits |= FieldsWriter.FIELD_IS_BINARY;
			if (field.IsCompressed())
				bits |= FieldsWriter.FIELD_IS_COMPRESSED;
			if (field.IsCompressed())
				// compression is enabled for the current field
				byte[] data = null;
				if (disableCompression)
					// optimized case for merging, the data
					// is already compressed
					data = field.BinaryValue();
					// check if it is a binary field
					if (field.IsBinary())
						data = Compress(field.BinaryValue());
						data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
				int len = data.Length;
				fieldsStream.WriteBytes(data, len);
				// compression is disabled for the current field
				if (field.IsBinary())
					byte[] data = field.BinaryValue();
					int len = data.Length;
					fieldsStream.WriteBytes(data, len);
 protected override MonotonicBlockPackedReader GetAddressInstance(IndexInput data, FieldInfo field,
     BinaryEntry bytes)
     return new MonotonicBlockPackedReader((IndexInput)data.Clone(), bytes.PackedIntsVersion, bytes.BlockSize, bytes.Count,
 public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo)
     this.fieldInfo = fieldInfo;
     DocState = parent.DocState;
     FieldState = new FieldInvertState(fieldInfo.Name);
     this.Consumer = parent.Consumer.AddField(this, fieldInfo);
     this.EndConsumer = parent.EndConsumer.AddField(this, fieldInfo);
        internal bool HasPayloads; // if enabled, and we actually saw any for this field

        public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo)
            this.TermsHashPerField = termsHashPerField;
            this.TermsWriter = termsWriter;
            this.FieldInfo = fieldInfo;
            DocState = termsHashPerField.DocState;
            FieldState = termsHashPerField.FieldState;
 public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
     this.FieldInfo = fieldInfo;
     this.IwBytesUsed = iwBytesUsed;
     Hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new BytesRefHash.DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     Pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     BytesUsed = Pending.RamBytesUsed();
 public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, bool trackDocsWithField)
     Pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     DocsWithField = trackDocsWithField ? new FixedBitSet(64) : null;
     BytesUsed = Pending.RamBytesUsed() + DocsWithFieldBytesUsed();
     this.FieldInfo = fieldInfo;
     this.IwBytesUsed = iwBytesUsed;
 public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
     this.perThread = perThread;
     this.fieldInfo = fieldInfo;
     docState = perThread.docState;
     fieldState = perThread.fieldState;
     this.consumer = perThread.consumer.addField(this, fieldInfo);
     this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
 public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo)
     this.termsHashPerField = termsHashPerField;
     this.perThread = perThread;
     this.fieldInfo = fieldInfo;
     docState = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
     omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
 public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo)
     this.termsHashPerField = termsHashPerField;
     this.perThread = perThread;
     this.termsWriter = perThread.termsWriter;
     this.fieldInfo = fieldInfo;
     docState = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
 public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
     this.FieldInfo = fieldInfo;
     this.Bytes = new PagedBytes(BLOCK_BITS);
     this.BytesOut = Bytes.DataOutput;
     this.Lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     this.IwBytesUsed = iwBytesUsed;
     this.DocsWithField = new FixedBitSet(64);
     this.BytesUsed = DocsWithFieldBytesUsed();
		public void  AddField(Fieldable field, FieldInfo fieldInfo)
			if (doc == null)
				doc = storedFieldsWriter.GetPerDoc();
				doc.docID = docState.docID;
				System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
				System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
				System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
			localFieldsWriter.WriteField(fieldInfo, field);
 public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHashPerThread perThread, TermsHashPerThread nextPerThread, FieldInfo fieldInfo)
     this.perThread = perThread;
     intPool = perThread.intPool;
     charPool = perThread.charPool;
     bytePool = perThread.bytePool;
     docState = perThread.docState;
     fieldState = docInverterPerField.fieldState;
     this.consumer = perThread.consumer.addField(this, fieldInfo);
     streamCount = consumer.getStreamCount();
     numPostingInt = 2 * streamCount;
     this.fieldInfo = fieldInfo;
     if (nextPerThread != null)
         nextPerField = (TermsHashPerField)nextPerThread.addField(docInverterPerField, fieldInfo);
         nextPerField = null;
        /// <summary>
        /// Creates an IndexableField whose value will be lazy loaded if and 
        /// when it is used. 
        /// <para>
        /// <b>NOTE:</b> This method must be called once for each value of the field 
        /// name specified in sequence that the values exist.  This method may not be 
        /// used to generate multiple, lazy, IndexableField instances refering to 
        /// the same underlying IndexableField instance.
        /// </para>
        /// <para>
        /// The lazy loading of field values from all instances of IndexableField 
        /// objects returned by this method are all backed by a single Document 
        /// per LazyDocument instance.
        /// </para>
        /// </summary>
        public virtual IndexableField GetField(FieldInfo fieldInfo)
            IList<LazyField> values = fields.ContainsKey(fieldInfo.Number) ? fields[fieldInfo.Number] : null;
            if (null == values)
                values = new List<LazyField>();
                fields[fieldInfo.Number] = values;

            LazyField value = new LazyField(this, fieldInfo.Name, fieldInfo.Number);

            lock (this)
                // edge case: if someone asks this LazyDoc for more LazyFields
                // after other LazyFields from the same LazyDoc have been
                // actuallized, we need to force the doc to be re-fetched
                // so the new LazyFields are also populated.
                doc = null;
            return value;
Beispiel #18
 public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo)
     IntPool = termsHash.IntPool;
     BytePool = termsHash.BytePool;
     TermBytePool = termsHash.TermBytePool;
     DocState = termsHash.DocState;
     this.TermsHash = termsHash;
     BytesUsed = termsHash.BytesUsed;
     FieldState = docInverterPerField.FieldState;
     this.Consumer = termsHash.Consumer.AddField(this, fieldInfo);
     PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed);
     BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts);
     StreamCount = Consumer.StreamCount;
     NumPostingInt = 2 * StreamCount;
     this.FieldInfo = fieldInfo;
     if (nextTermsHash != null)
         NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo);
         NextPerField = null;
Beispiel #19
        private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
            //we have a binary stored field, and it may be compressed
            if (binary)
                int toRead = fieldsStream.ReadVInt();
                var b = new byte[toRead];
                fieldsStream.ReadBytes(b, 0, b.Length);
                doc.Add(compressed ? new Field(, Uncompress(b), Field.Store.YES) : new Field(, b, Field.Store.YES));
                const Field.Store store = Field.Store.YES;
                Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                AbstractField f;
                if (compressed)
                    int toRead = fieldsStream.ReadVInt();

                    var b = new byte[toRead];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    f = new Field(, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index,
                                  termVector) {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
                    f = new Field(, false, fieldsStream.ReadString(), store, index, termVector)
                            {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};

Beispiel #20
 internal override InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo)
     return new NormsConsumerPerField(docInverterPerField, fieldInfo, this);
Beispiel #21
        private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
            if (binary)
                int toRead = fieldsStream.ReadVInt();
                long pointer = fieldsStream.FilePointer;
                //was: doc.add(new Fieldable(, b, Fieldable.Store.YES));
                doc.Add(new LazyField(this,, Field.Store.YES, toRead, pointer, binary, compressed));

                //Need to move the pointer ahead by toRead positions
                fieldsStream.Seek(pointer + toRead);
                const Field.Store store = Field.Store.YES;
                Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                AbstractField f;
                if (compressed)
                    int toRead = fieldsStream.ReadVInt();
                    long pointer = fieldsStream.FilePointer;
                    f = new LazyField(this,, store, toRead, pointer, binary, compressed);
                    //skip over the part that we aren't loading
                    fieldsStream.Seek(pointer + toRead);
                    f.OmitNorms = fi.omitNorms;
                    f.OmitTermFreqAndPositions = fi.omitTermFreqAndPositions;
                    int length = fieldsStream.ReadVInt();
                    long pointer = fieldsStream.FilePointer;
                    //Skip ahead of where we are by the length of what is stored
                    if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                        fieldsStream.Seek(pointer + length);
                    f = new LazyField(this,, store, index, termVector, length, pointer, binary, compressed)
                            {OmitNorms = fi.omitNorms, OmitTermFreqAndPositions = fi.omitTermFreqAndPositions};

		public override DocFieldConsumerPerField AddField(FieldInfo fi)
			return new DocInverterPerField(this, fi);
Beispiel #23
		private FieldInfo AddInternal(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
			name = StringHelper.Intern(name);
			FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
			byName[name] = fi;
			return fi;
 public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
     First.AddField(docID, field, fieldInfo);
     Second.AddField(docID, field, fieldInfo);
		abstract public TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
 public DocFieldProcessorPerField(DocFieldProcessorPerThread perThread, FieldInfo fieldInfo)
     this.consumer = perThread.consumer.AddField(fieldInfo);
     this.fieldInfo = fieldInfo;
Beispiel #27
        internal void  WriteField(FieldInfo fi, Fieldable field)
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);

            byte bits = 0;

            if (field.IsTokenized())
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            if (field.IsBinary())
                bits |= FieldsWriter.FIELD_IS_BINARY;
            if (field.IsCompressed())
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;


            if (field.IsCompressed())
                // compression is enabled for the current field
                byte[] data;
                int    len;
                int    offset;
                if (disableCompression)
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.GetBinaryValue();
                    System.Diagnostics.Debug.Assert(data != null);
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();
                    // check if it is a binary field
                    if (field.IsBinary())
                        data = CompressionTools.Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
                        byte[] x = defaultEncoding.GetBytes(field.StringValue());
                        data = CompressionTools.Compress(x, 0, x.Length);
                    len    = data.Length;
                    offset = 0;

                fieldsStream.WriteBytes(data, offset, len);
                // compression is disabled for the current field
                if (field.IsBinary())
                    byte[] data;
                    int    len;
                    int    offset;
                    data   = field.GetBinaryValue();
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();

                    fieldsStream.WriteBytes(data, offset, len);
			public FieldForMerge(System.Object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
				this.isStored = true;
				this.fieldsData = value_Renamed;
				this.isCompressed = compressed;
				this.isBinary = binary;
				if (binary)
					binaryLength = ((byte[]) value_Renamed).Length;
				this.isTokenized = tokenize; = StringHelper.Intern(;
				this.isIndexed = fi.isIndexed;
				this.omitNorms = fi.omitNorms;
				this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
				this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
				this.storePositionWithTermVector = fi.storePositionWithTermVector;
				this.storeTermVector = fi.storeTermVector;
		private Field.Index GetIndexType(FieldInfo fi, bool tokenize)
			Field.Index index;
			if (fi.isIndexed && tokenize)
				index = Field.Index.ANALYZED;
			else if (fi.isIndexed && !tokenize)
				index = Field.Index.NOT_ANALYZED;
				index = Field.Index.NO;
			return index;
		private Field.TermVector GetTermVectorType(FieldInfo fi)
			Field.TermVector termVector = null;
			if (fi.storeTermVector)
				if (fi.storeOffsetWithTermVector)
					if (fi.storePositionWithTermVector)
						termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
						termVector = Field.TermVector.WITH_OFFSETS;
				else if (fi.storePositionWithTermVector)
					termVector = Field.TermVector.WITH_POSITIONS;
					termVector = Field.TermVector.YES;
				termVector = Field.TermVector.NO;
			return termVector;
Beispiel #31
        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix)
            FieldInfo info = reader.FieldInfos.FieldInfo(m_field);

            if (info != null && info.HasDocValues)
                throw new InvalidOperationException("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType);
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = Environment.TickCount;

            m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
                // No terms
            Terms terms = fields.GetTerms(m_field);

            if (terms == null)
                // No terms

            TermsEnum te        = terms.GetEnumerator();
            BytesRef  seekStart = termPrefix ?? new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
                // No terms match

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            // enumerate all terms, and build an intermediate form of the un-inverted field.
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            m_docsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
                BytesRef t = te.Term;
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                        m_ordBase = (int)te.Ord;
                        //System.out.println("got ordBase=" + ordBase);
                    catch (NotSupportedException) // LUCENENET: IDE0059: Remove unnecessary value assignment
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    testedOrd = true;

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & indexIntervalMask) == 0)
                    // Index this term
                    m_sizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes

                int df = te.DocFreq;
                if (df <= m_maxTermDocFreq)
                    m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                        int doc = m_docsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        //System.out.println("  chunk=" + chunk + " docs");


                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = (int)((uint)val >> 8);
                            int ilen   = VInt32Size(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            pos        = WriteInt32(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                                ipos = 0;
                            else if ((val & 0x0000ff80) == 0)
                                ipos = 1;
                            else if ((val & 0x00ff8000) == 0)
                                ipos = 2;
                            else if ((val & 0xff800000) == 0)
                                ipos = 3;
                                ipos = 4;

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt32(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                index[doc] = val;
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                    tempArr[j] = (sbyte)val;
                                    val        = (int)((uint)val >> 8);
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                    SetActualDocFreq(termNum, actualDF);

                if (!te.MoveNext())

            m_numTermsInField = termNum;

            long midPoint = Environment.TickCount;

            if (m_termInstances == 0)
                // we didn't invert anything
                // lower memory consumption.
                m_tnums = null;
                this.m_index = index;

                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.

                for (int pass = 0; pass < 256; pass++)
                    var target = m_tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                        pos = target.Length;
                        target = new sbyte[4096];

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                                int len = (int)((uint)val >> 8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                    // we only have 24 bits for the array index
                                    throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + m_field);
                                var arr = bytes[doc];

                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                    int newlen = target.Length;

                                    //* we don't have to worry about the array getting too large
                                    // since the "pos" param will overflow first (only 24 bits available)
                                    // if ((newlen<<1) <= 0) {
                                    //  // overflow...
                                    //  newlen = Integer.MAX_VALUE;
                                    //  if (newlen <= pos + len) {
                                    //    throw new SolrException(400,"Too many terms to uninvert field!");
                                    //  }
                                    // } else {
                                    //  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    // }
                                    while (newlen <= pos + len) // doubling strategy
                                        newlen <<= 1;
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator

                    // shrink array
                    if (pos < target.Length)
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;

                    m_tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
            if (indexedTerms != null)
                m_indexedTermsArray = new BytesRef[indexedTerms.Count];
                indexedTerms.CopyTo(m_indexedTermsArray, 0);

            long endTime = Environment.TickCount;

            m_total_time  = (int)(endTime - startTime);
            m_phase1_time = (int)(midPoint - startTime);
Beispiel #32
 // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
 // Read just the size -- caller must skip the field content to continue reading fields
 // Return the size in bytes or chars, depending on field type
 private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
     int size = fieldsStream.ReadVInt(), bytesize = binary || compressed?size:2 * size;
     var sizebytes = new byte[4];
     sizebytes[0] = (byte) (Number.URShift(bytesize, 24));
     sizebytes[1] = (byte) (Number.URShift(bytesize, 16));
     sizebytes[2] = (byte) (Number.URShift(bytesize, 8));
     sizebytes[3] = (byte) bytesize;
     doc.Add(new Field(, sizebytes, Field.Store.YES));
     return size;
        internal bool hasPayloads; // if enabled, and we actually saw any for this field

        public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo)
            this.termsHashPerField = termsHashPerField;
            this.termsWriter       = termsWriter;
            this.fieldInfo         = fieldInfo;
            docState   = termsHashPerField.docState;
            fieldState = termsHashPerField.fieldState;