protected override MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field,
     NumericEntry entry)
 {
     data.Seek(entry.Offset);
     return new MonotonicBlockPackedReader((IndexInput)data.Clone(), entry.PackedIntsVersion, entry.BlockSize, entry.Count,
         true);
 }
Esempio n. 2
0
 public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
 {
     DocValuesType_e? dvType = field.FieldType().DocValueType;
     if (dvType != null)
     {
         fieldInfo.DocValuesType = dvType;
         if (dvType == DocValuesType_e.BINARY)
         {
             AddBinaryField(fieldInfo, docID, field.BinaryValue());
         }
         else if (dvType == DocValuesType_e.SORTED)
         {
             AddSortedField(fieldInfo, docID, field.BinaryValue());
         }
         else if (dvType == DocValuesType_e.SORTED_SET)
         {
             AddSortedSetField(fieldInfo, docID, field.BinaryValue());
         }
         else if (dvType == DocValuesType_e.NUMERIC)
         {
             if (!(field.NumericValue is long?))
             {
                 throw new System.ArgumentException("illegal type " + field.NumericValue.GetType() + ": DocValues types must be Long");
             }
             AddNumericField(fieldInfo, docID, (long)field.NumericValue);
         }
         else
         {
             Debug.Assert(false, "unrecognized DocValues.Type: " + dvType);
         }
     }
 }
 public NormsWriterPerField(DocInverterPerField docInverterPerField, NormsWriterPerThread perThread, FieldInfo fieldInfo)
 {
     this.perThread = perThread;
     this.fieldInfo = fieldInfo;
     docState = perThread.docState;
     fieldState = docInverterPerField.fieldState;
 }
		internal void  SetField(FieldInfo fieldInfo)
		{
			this.fieldInfo = fieldInfo;
			omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
			storePayloads = fieldInfo.storePayloads;
			posWriter.SetField(fieldInfo);
		}
Esempio n. 5
0
		internal void  WriteField(FieldInfo fi, Fieldable field)
		{
			// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
			// and field.binaryValue() already returns the compressed value for a field
			// with isCompressed()==true, so we disable compression in that case
			bool disableCompression = (field is FieldsReader.FieldForMerge);
			fieldsStream.WriteVInt(fi.number);
			byte bits = 0;
			if (field.IsTokenized())
				bits |= FieldsWriter.FIELD_IS_TOKENIZED;
			if (field.IsBinary())
				bits |= FieldsWriter.FIELD_IS_BINARY;
			if (field.IsCompressed())
				bits |= FieldsWriter.FIELD_IS_COMPRESSED;
			
			fieldsStream.WriteByte(bits);
			
			if (field.IsCompressed())
			{
				// compression is enabled for the current field
				byte[] data = null;
				
				if (disableCompression)
				{
					// optimized case for merging, the data
					// is already compressed
					data = field.BinaryValue();
				}
				else
				{
					// check if it is a binary field
					if (field.IsBinary())
					{
						data = Compress(field.BinaryValue());
					}
					else
					{
						data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
					}
				}
				int len = data.Length;
				fieldsStream.WriteVInt(len);
				fieldsStream.WriteBytes(data, len);
			}
			else
			{
				// compression is disabled for the current field
				if (field.IsBinary())
				{
					byte[] data = field.BinaryValue();
					int len = data.Length;
					fieldsStream.WriteVInt(len);
					fieldsStream.WriteBytes(data, len);
				}
				else
				{
					fieldsStream.WriteString(field.StringValue());
				}
			}
		}
 protected override MonotonicBlockPackedReader GetAddressInstance(IndexInput data, FieldInfo field,
     BinaryEntry bytes)
 {
     data.Seek(bytes.AddressesOffset);
     return new MonotonicBlockPackedReader((IndexInput)data.Clone(), bytes.PackedIntsVersion, bytes.BlockSize, bytes.Count,
         true);
 }
Esempio n. 7
0
 public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo)
 {
     this.fieldInfo = fieldInfo;
     DocState = parent.DocState;
     FieldState = new FieldInvertState(fieldInfo.Name);
     this.Consumer = parent.Consumer.AddField(this, fieldInfo);
     this.EndConsumer = parent.EndConsumer.AddField(this, fieldInfo);
 }
        internal bool HasPayloads; // if enabled, and we actually saw any for this field

        public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo)
        {
            this.TermsHashPerField = termsHashPerField;
            this.TermsWriter = termsWriter;
            this.FieldInfo = fieldInfo;
            DocState = termsHashPerField.DocState;
            FieldState = termsHashPerField.FieldState;
        }
 public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
 {
     this.FieldInfo = fieldInfo;
     this.IwBytesUsed = iwBytesUsed;
     Hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new BytesRefHash.DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     Pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     BytesUsed = Pending.RamBytesUsed();
     iwBytesUsed.AddAndGet(BytesUsed);
 }
Esempio n. 10
0
 public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, bool trackDocsWithField)
 {
     Pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     DocsWithField = trackDocsWithField ? new FixedBitSet(64) : null;
     BytesUsed = Pending.RamBytesUsed() + DocsWithFieldBytesUsed();
     this.FieldInfo = fieldInfo;
     this.IwBytesUsed = iwBytesUsed;
     iwBytesUsed.AddAndGet(BytesUsed);
 }
 public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
 {
     this.perThread = perThread;
     this.fieldInfo = fieldInfo;
     docState = perThread.docState;
     fieldState = perThread.fieldState;
     this.consumer = perThread.consumer.addField(this, fieldInfo);
     this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
 }
 public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo)
 {
     this.termsHashPerField = termsHashPerField;
     this.perThread = perThread;
     this.fieldInfo = fieldInfo;
     docState = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
     omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
 }
 public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo)
 {
     this.termsHashPerField = termsHashPerField;
     this.perThread = perThread;
     this.termsWriter = perThread.termsWriter;
     this.fieldInfo = fieldInfo;
     docState = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
 }
Esempio n. 14
0
 public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
 {
     this.FieldInfo = fieldInfo;
     this.Bytes = new PagedBytes(BLOCK_BITS);
     this.BytesOut = Bytes.DataOutput;
     this.Lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     this.IwBytesUsed = iwBytesUsed;
     this.DocsWithField = new FixedBitSet(64);
     this.BytesUsed = DocsWithFieldBytesUsed();
     iwBytesUsed.AddAndGet(BytesUsed);
 }
		public void  AddField(Fieldable field, FieldInfo fieldInfo)
		{
			if (doc == null)
			{
				doc = storedFieldsWriter.GetPerDoc();
				doc.docID = docState.docID;
				localFieldsWriter.SetFieldsStream(doc.fdt);
				System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
				System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
				System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
			}
			
			localFieldsWriter.WriteField(fieldInfo, field);
			System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
			doc.numStoredFields++;
		}
 public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHashPerThread perThread, TermsHashPerThread nextPerThread, FieldInfo fieldInfo)
 {
     this.perThread = perThread;
     intPool = perThread.intPool;
     charPool = perThread.charPool;
     bytePool = perThread.bytePool;
     docState = perThread.docState;
     fieldState = docInverterPerField.fieldState;
     this.consumer = perThread.consumer.addField(this, fieldInfo);
     streamCount = consumer.getStreamCount();
     numPostingInt = 2 * streamCount;
     this.fieldInfo = fieldInfo;
     if (nextPerThread != null)
         nextPerField = (TermsHashPerField)nextPerThread.addField(docInverterPerField, fieldInfo);
     else
         nextPerField = null;
 }
Esempio n. 17
0
        /// <summary>
        /// Creates an IndexableField whose value will be lazy loaded if and 
        /// when it is used. 
        /// <para>
        /// <b>NOTE:</b> This method must be called once for each value of the field 
        /// name specified in sequence that the values exist.  This method may not be 
        /// used to generate multiple, lazy, IndexableField instances refering to 
        /// the same underlying IndexableField instance.
        /// </para>
        /// <para>
        /// The lazy loading of field values from all instances of IndexableField 
        /// objects returned by this method are all backed by a single Document 
        /// per LazyDocument instance.
        /// </para>
        /// </summary>
        public virtual IndexableField GetField(FieldInfo fieldInfo)
        {
            fieldNames.Add(fieldInfo.Name);
            IList<LazyField> values = fields.ContainsKey(fieldInfo.Number) ? fields[fieldInfo.Number] : null;
            if (null == values)
            {
                values = new List<LazyField>();
                fields[fieldInfo.Number] = values;
            }

            LazyField value = new LazyField(this, fieldInfo.Name, fieldInfo.Number);
            values.Add(value);

            lock (this)
            {
                // edge case: if someone asks this LazyDoc for more LazyFields
                // after other LazyFields from the same LazyDoc have been
                // actuallized, we need to force the doc to be re-fetched
                // so the new LazyFields are also populated.
                doc = null;
            }
            return value;
        }
Esempio n. 18
0
 public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo)
 {
     IntPool = termsHash.IntPool;
     BytePool = termsHash.BytePool;
     TermBytePool = termsHash.TermBytePool;
     DocState = termsHash.DocState;
     this.TermsHash = termsHash;
     BytesUsed = termsHash.BytesUsed;
     FieldState = docInverterPerField.FieldState;
     this.Consumer = termsHash.Consumer.AddField(this, fieldInfo);
     PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed);
     BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts);
     StreamCount = Consumer.StreamCount;
     NumPostingInt = 2 * StreamCount;
     this.FieldInfo = fieldInfo;
     if (nextTermsHash != null)
     {
         NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo);
     }
     else
     {
         NextPerField = null;
     }
 }
Esempio n. 19
0
        private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
        {
            //we have a binary stored field, and it may be compressed
            if (binary)
            {
                int toRead = fieldsStream.ReadVInt();
                var b = new byte[toRead];
                fieldsStream.ReadBytes(b, 0, b.Length);
                doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES));
            }
            else
            {
                const Field.Store store = Field.Store.YES;
                Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                AbstractField f;
                if (compressed)
                {
                    int toRead = fieldsStream.ReadVInt();

                    var b = new byte[toRead];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index,
                                  termVector) {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
                }
                else
                {
                    f = new Field(fi.name, false, fieldsStream.ReadString(), store, index, termVector)
                            {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
                }

                doc.Add(f);
            }
        }
Esempio n. 20
0
 internal override InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo)
 {
     return new NormsConsumerPerField(docInverterPerField, fieldInfo, this);
 }
Esempio n. 21
0
        private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
        {
            if (binary)
            {
                int toRead = fieldsStream.ReadVInt();
                long pointer = fieldsStream.FilePointer;
                //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
                doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary, compressed));

                //Need to move the pointer ahead by toRead positions
                fieldsStream.Seek(pointer + toRead);
            }
            else
            {
                const Field.Store store = Field.Store.YES;
                Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                AbstractField f;
                if (compressed)
                {
                    int toRead = fieldsStream.ReadVInt();
                    long pointer = fieldsStream.FilePointer;
                    f = new LazyField(this, fi.name, store, toRead, pointer, binary, compressed);
                    //skip over the part that we aren't loading
                    fieldsStream.Seek(pointer + toRead);
                    f.OmitNorms = fi.omitNorms;
                    f.OmitTermFreqAndPositions = fi.omitTermFreqAndPositions;
                }
                else
                {
                    int length = fieldsStream.ReadVInt();
                    long pointer = fieldsStream.FilePointer;
                    //Skip ahead of where we are by the length of what is stored
                    if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                    {
                        fieldsStream.Seek(pointer + length);
                    }
                    else
                    {
                        fieldsStream.SkipChars(length);
                    }
                    f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary, compressed)
                            {OmitNorms = fi.omitNorms, OmitTermFreqAndPositions = fi.omitTermFreqAndPositions};
                }

                doc.Add(f);
            }
        }
		public override DocFieldConsumerPerField AddField(FieldInfo fi)
		{
			return new DocInverterPerField(this, fi);
		}
Esempio n. 23
0
		private FieldInfo AddInternal(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
		{
			name = StringHelper.Intern(name);
			FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
			byNumber.Add(fi);
			byName[name] = fi;
			return fi;
		}
 public override void AddField(int docID, IndexableField field, FieldInfo fieldInfo)
 {
     First.AddField(docID, field, fieldInfo);
     Second.AddField(docID, field, fieldInfo);
 }
		abstract public TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
 public DocFieldProcessorPerField(DocFieldProcessorPerThread perThread, FieldInfo fieldInfo)
 {
     this.consumer = perThread.consumer.AddField(fieldInfo);
     this.fieldInfo = fieldInfo;
 }
Esempio n. 27
0
        internal void  WriteField(FieldInfo fi, Fieldable field)
        {
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);

            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized())
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary())
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }
            if (field.IsCompressed())
            {
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;
            }

            fieldsStream.WriteByte(bits);

            if (field.IsCompressed())
            {
                // compression is enabled for the current field
                byte[] data;
                int    len;
                int    offset;
                if (disableCompression)
                {
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.GetBinaryValue();
                    System.Diagnostics.Debug.Assert(data != null);
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();
                }
                else
                {
                    // check if it is a binary field
                    if (field.IsBinary())
                    {
                        data = CompressionTools.Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
                    }
                    else
                    {
                        byte[] x = defaultEncoding.GetBytes(field.StringValue());
                        data = CompressionTools.Compress(x, 0, x.Length);
                    }
                    len    = data.Length;
                    offset = 0;
                }

                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, offset, len);
            }
            else
            {
                // compression is disabled for the current field
                if (field.IsBinary())
                {
                    byte[] data;
                    int    len;
                    int    offset;
                    data   = field.GetBinaryValue();
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();

                    fieldsStream.WriteVInt(len);
                    fieldsStream.WriteBytes(data, offset, len);
                }
                else
                {
                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }
Esempio n. 28
0
			public FieldForMerge(System.Object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
			{
				this.isStored = true;
				this.fieldsData = value_Renamed;
				this.isCompressed = compressed;
				this.isBinary = binary;
				if (binary)
					binaryLength = ((byte[]) value_Renamed).Length;
				
				this.isTokenized = tokenize;
				
				this.name = StringHelper.Intern(fi.name);
				this.isIndexed = fi.isIndexed;
				this.omitNorms = fi.omitNorms;
				this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
				this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
				this.storePositionWithTermVector = fi.storePositionWithTermVector;
				this.storeTermVector = fi.storeTermVector;
			}
Esempio n. 29
0
		private Field.Index GetIndexType(FieldInfo fi, bool tokenize)
		{
			Field.Index index;
			if (fi.isIndexed && tokenize)
				index = Field.Index.ANALYZED;
			else if (fi.isIndexed && !tokenize)
				index = Field.Index.NOT_ANALYZED;
			else
				index = Field.Index.NO;
			return index;
		}
Esempio n. 30
0
		private Field.TermVector GetTermVectorType(FieldInfo fi)
		{
			Field.TermVector termVector = null;
			if (fi.storeTermVector)
			{
				if (fi.storeOffsetWithTermVector)
				{
					if (fi.storePositionWithTermVector)
					{
						termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
					}
					else
					{
						termVector = Field.TermVector.WITH_OFFSETS;
					}
				}
				else if (fi.storePositionWithTermVector)
				{
					termVector = Field.TermVector.WITH_POSITIONS;
				}
				else
				{
					termVector = Field.TermVector.YES;
				}
			}
			else
			{
				termVector = Field.TermVector.NO;
			}
			return termVector;
		}
Esempio n. 31
0
        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix)
        {
            FieldInfo info = reader.FieldInfos.FieldInfo(m_field);

            if (info != null && info.HasDocValues)
            {
                throw new InvalidOperationException("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType);
            }
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = Environment.TickCount;

            m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
            {
                // No terms
                return;
            }
            Terms terms = fields.GetTerms(m_field);

            if (terms == null)
            {
                // No terms
                return;
            }

            TermsEnum te        = terms.GetEnumerator();
            BytesRef  seekStart = termPrefix ?? new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
            {
                // No terms match
                return;
            }

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            //
            // enumerate all terms, and build an intermediate form of the un-inverted field.
            //
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            //
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            m_docsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
            {
                BytesRef t = te.Term;
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                {
                    break;
                }
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                {
                    try
                    {
                        m_ordBase = (int)te.Ord;
                        //System.out.println("got ordBase=" + ordBase);
                    }
                    catch (NotSupportedException) // LUCENENET: IDE0059: Remove unnecessary value assignment
                    {
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    }
                    testedOrd = true;
                }

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & indexIntervalMask) == 0)
                {
                    // Index this term
                    m_sizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes
                    indexedTerms.Add(indexedTerm);
                }

                int df = te.DocFreq;
                if (df <= m_maxTermDocFreq)
                {
                    m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                    {
                        int doc = m_docsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        //System.out.println("  chunk=" + chunk + " docs");

                        actualDF++;
                        m_termInstances++;

                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                        {
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = (int)((uint)val >> 8);
                            int ilen   = VInt32Size(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                            {
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            }
                            pos        = WriteInt32(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                        }
                        else
                        {
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                            {
                                ipos = 0;
                            }
                            else if ((val & 0x0000ff80) == 0)
                            {
                                ipos = 1;
                            }
                            else if ((val & 0x00ff8000) == 0)
                            {
                                ipos = 2;
                            }
                            else if ((val & 0xff800000) == 0)
                            {
                                ipos = 3;
                            }
                            else
                            {
                                ipos = 4;
                            }

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt32(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                            {
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                {
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                }
                                index[doc] = val;
                            }
                            else
                            {
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                {
                                    tempArr[j] = (sbyte)val;
                                    val        = (int)((uint)val >> 8);
                                }
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                            }
                        }
                    }
                    SetActualDocFreq(termNum, actualDF);
                }

                termNum++;
                if (!te.MoveNext())
                {
                    break;
                }
            }

            m_numTermsInField = termNum;

            long midPoint = Environment.TickCount;

            if (m_termInstances == 0)
            {
                // we didn't invert anything
                // lower memory consumption.
                m_tnums = null;
            }
            else
            {
                this.m_index = index;

                //
                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.
                //

                for (int pass = 0; pass < 256; pass++)
                {
                    var target = m_tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                    {
                        pos = target.Length;
                    }
                    else
                    {
                        target = new sbyte[4096];
                    }

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                    {
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                        {
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                            {
                                int len = (int)((uint)val >> 8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                {
                                    // we only have 24 bits for the array index
                                    throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + m_field);
                                }
                                var arr = bytes[doc];

                                /*
                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                 */
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                {
                                    int newlen = target.Length;

                                    //* we don't have to worry about the array getting too large
                                    // since the "pos" param will overflow first (only 24 bits available)
                                    // if ((newlen<<1) <= 0) {
                                    //  // overflow...
                                    //  newlen = Integer.MAX_VALUE;
                                    //  if (newlen <= pos + len) {
                                    //    throw new SolrException(400,"Too many terms to uninvert field!");
                                    //  }
                                    // } else {
                                    //  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    // }
                                    //
                                    while (newlen <= pos + len) // doubling strategy
                                    {
                                        newlen <<= 1;
                                    }
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                }
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator
                            }
                        }
                    }

                    // shrink array
                    if (pos < target.Length)
                    {
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;
                    }

                    m_tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
                    {
                        break;
                    }
                }
            }
            if (indexedTerms != null)
            {
                m_indexedTermsArray = new BytesRef[indexedTerms.Count];
                indexedTerms.CopyTo(m_indexedTermsArray, 0);
            }

            long endTime = Environment.TickCount;

            m_total_time  = (int)(endTime - startTime);
            m_phase1_time = (int)(midPoint - startTime);
        }
Esempio n. 32
0
 // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
 // Read just the size -- caller must skip the field content to continue reading fields
 // Return the size in bytes or chars, depending on field type
 private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
 {
     int size = fieldsStream.ReadVInt(), bytesize = binary || compressed?size:2 * size;
     var sizebytes = new byte[4];
     sizebytes[0] = (byte) (Number.URShift(bytesize, 24));
     sizebytes[1] = (byte) (Number.URShift(bytesize, 16));
     sizebytes[2] = (byte) (Number.URShift(bytesize, 8));
     sizebytes[3] = (byte) bytesize;
     doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
     return size;
 }
        internal bool hasPayloads; // if enabled, and we actually saw any for this field

        public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo)
        {
            this.termsHashPerField = termsHashPerField;
            this.termsWriter       = termsWriter;
            this.fieldInfo         = fieldInfo;
            docState   = termsHashPerField.docState;
            fieldState = termsHashPerField.fieldState;
        }