//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private index.BinaryDocValues loadBinary(index.FieldInfo field) throws java.io.IOException private BinaryDocValues loadBinary(FieldInfo field) { BinaryEntry entry = binaries[field.number]; data.seek(entry.offset); PagedBytes bytes = new PagedBytes(16); bytes.copy(data, entry.numBytes); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final util.PagedBytes.Reader bytesReader = bytes.freeze(true); PagedBytes.Reader bytesReader = bytes.freeze(true); if (entry.minLength == entry.maxLength) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int fixedLength = entry.minLength; int fixedLength = entry.minLength; ramBytesUsed_Renamed.addAndGet(bytes.ramBytesUsed()); return(new BinaryDocValuesAnonymousInnerClassHelper(this, bytesReader, fixedLength)); } else { data.seek(data.FilePointer + entry.missingBytes); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final util.packed.MonotonicBlockPackedReader addresses = new util.packed.MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false); MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false); ramBytesUsed_Renamed.addAndGet(bytes.ramBytesUsed() + addresses.ramBytesUsed()); return(new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, addresses)); } }
private BinaryDocValues LoadBinary(FieldInfo field) { BinaryEntry entry = binaries[field.Number]; data.Seek(entry.offset); var bytes = new PagedBytes(16); bytes.Copy(data, entry.numBytes); var bytesReader = bytes.Freeze(true); if (entry.minLength == entry.maxLength) { int fixedLength = entry.minLength; ramBytesUsed.AddAndGet(bytes.RamBytesUsed()); return(new BinaryDocValuesAnonymousClass(bytesReader, fixedLength)); } else { data.Seek(data.Position + entry.missingBytes); // LUCENENET specific: Renamed from getFilePointer() to match FileStream var addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false); ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + addresses.RamBytesUsed()); return(new BinaryDocValuesAnonymousClass2(bytesReader, addresses)); } }
private BinaryDocValues LoadBinary(FieldInfo field) { BinaryEntry entry = binaries[field.Number]; data.Seek(entry.offset); var bytes = new PagedBytes(16); bytes.Copy(data, entry.numBytes); var bytesReader = bytes.Freeze(true); if (entry.minLength == entry.maxLength) { int fixedLength = entry.minLength; ramBytesUsed.AddAndGet(bytes.RamBytesUsed()); return(new BinaryDocValuesAnonymousInnerClassHelper(this, bytesReader, fixedLength)); } else { data.Seek(data.GetFilePointer() + entry.missingBytes); var addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false); ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + addresses.RamBytesUsed()); return(new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, addresses)); } }
private BinaryDocValues LoadBytesFixedStraight(FieldInfo field) { string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); IndexInput input = dir.OpenInput(fileName, state.Context); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_CODEC_NAME, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_CURRENT); int fixedLength = input.ReadInt32(); var bytes = new PagedBytes(16); bytes.Copy(input, fixedLength * (long)state.SegmentInfo.DocCount); PagedBytes.Reader bytesReader = bytes.Freeze(true); CodecUtil.CheckEOF(input); success = true; ramBytesUsed.AddAndGet(bytes.RamBytesUsed()); return(new BinaryDocValuesAnonymousClass(fixedLength, bytesReader)); } finally { if (success) { IOUtils.Dispose(input); } else { IOUtils.DisposeWhileHandlingException(input); } } }
/// <summary> /// Loads the segment information at segment load time. /// </summary> /// <param name="indexEnum"> /// The term enum. </param> /// <param name="indexDivisor"> /// The index divisor. </param> /// <param name="tiiFileLength"> /// The size of the tii file, used to approximate the size of the /// buffer. </param> /// <param name="totalIndexInterval"> /// The total index interval. </param> public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.GetDataOutput(); int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInt32s.DEFAULT); string currentField = null; IList <string> fieldStrs = new List <string>(); int fieldCounter = -1; for (int i = 0; indexEnum.Next(); i++) { Term term = indexEnum.Term(); if (currentField == null || !currentField.Equals(term.Field, StringComparison.Ordinal)) { currentField = term.Field; fieldStrs.Add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.TermInfo(); indexToTerms.Set(i, dataOutput.GetPosition()); dataOutput.WriteVInt32(fieldCounter); dataOutput.WriteString(term.Text()); dataOutput.WriteVInt32(termInfo.DocFreq); if (termInfo.DocFreq >= skipInterval) { dataOutput.WriteVInt32(termInfo.SkipOffset); } dataOutput.WriteVInt64(termInfo.FreqPointer); dataOutput.WriteVInt64(termInfo.ProxPointer); dataOutput.WriteVInt64(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } fields = new Term[fieldStrs.Count]; for (int i = 0; i < fields.Length; i++) { fields[i] = new Term(fieldStrs[i]); } dataPagedBytes.Freeze(true); dataInput = dataPagedBytes.GetDataInput(); indexToDataOffset = indexToTerms.Mutable; ramBytesUsed = fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + indexToDataOffset.RamBytesUsed(); }
/// <summary> /// Loads the segment information at segment load time. /// </summary> /// <param name="indexEnum"> /// the term enum. </param> /// <param name="indexDivisor"> /// the index divisor. </param> /// <param name="tiiFileLength"> /// the size of the tii file, used to approximate the size of the /// buffer. </param> /// <param name="totalIndexInterval"> /// the total index interval. </param> public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) { this.TotalIndexInterval = totalIndexInterval; IndexSize = 1 + ((int)indexEnum.Size - 1) / indexDivisor; SkipInterval = indexEnum.SkipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.DataOutput; int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, IndexSize, PackedInts.DEFAULT); string currentField = null; IList<string> fieldStrs = new List<string>(); int fieldCounter = -1; for (int i = 0; indexEnum.Next(); i++) { Term term = indexEnum.Term(); if (currentField == null || !currentField.Equals(term.Field())) { currentField = term.Field(); fieldStrs.Add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.TermInfo(); indexToTerms.Set(i, dataOutput.Position); dataOutput.WriteVInt(fieldCounter); dataOutput.WriteString(term.Text()); dataOutput.WriteVInt(termInfo.DocFreq); if (termInfo.DocFreq >= SkipInterval) { dataOutput.WriteVInt(termInfo.SkipOffset); } dataOutput.WriteVLong(termInfo.FreqPointer); dataOutput.WriteVLong(termInfo.ProxPointer); dataOutput.WriteVLong(indexEnum.IndexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } Fields = new Term[fieldStrs.Count]; for (int i = 0; i < Fields.Length; i++) { Fields[i] = new Term(fieldStrs[i]); } dataPagedBytes.Freeze(true); DataInput = dataPagedBytes.DataInput; IndexToDataOffset = indexToTerms.Mutable; RamBytesUsed_Renamed = Fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + IndexToDataOffset.RamBytesUsed(); }
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.bytes = new PagedBytes(BLOCK_BITS); this.bytesOut = bytes.GetDataOutput(); this.lengths = new AppendingDeltaPackedInt64Buffer(PackedInt32s.COMPACT); this.iwBytesUsed = iwBytesUsed; this.docsWithField = new FixedBitSet(64); this.bytesUsed = DocsWithFieldBytesUsed(); iwBytesUsed.AddAndGet(bytesUsed); }
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.FieldInfo = fieldInfo; this.Bytes = new PagedBytes(BLOCK_BITS); this.BytesOut = Bytes.DataOutput; this.Lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT); this.IwBytesUsed = iwBytesUsed; this.DocsWithField = new FixedBitSet(64); this.BytesUsed = DocsWithFieldBytesUsed(); iwBytesUsed.AddAndGet(BytesUsed); }
private SortedDocValues LoadBytesFixedSorted(/*FieldInfo field, // LUCENENET: Never read */ IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); int fixedLength = data.ReadInt32(); int valueCount = index.ReadInt32(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, fixedLength * (long)valueCount); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInt32s.Reader reader = PackedInt32s.GetReader(index); ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); return(CorrectBuggyOrds(new SortedDocValuesAnonymousClass(fixedLength, valueCount, bytesReader, reader))); }
private SortedDocValues LoadBytesVarSorted(/*FieldInfo field, // LUCENENET: Never read */ IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); long maxAddress = index.ReadInt64(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, maxAddress); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInt32s.Reader addressReader = PackedInt32s.GetReader(index); PackedInt32s.Reader ordsReader = PackedInt32s.GetReader(index); int valueCount = addressReader.Count - 1; ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed()); return(CorrectBuggyOrds(new SortedDocValuesAnonymousClass2(bytesReader, addressReader, ordsReader, valueCount))); }
private SortedDocValues LoadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); long maxAddress = index.ReadLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, maxAddress); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader addressReader = PackedInts.GetReader(index); PackedInts.Reader ordsReader = PackedInts.GetReader(index); int valueCount = addressReader.Size() - 1; RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed()); return(CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper2(this, bytesReader, addressReader, ordsReader, valueCount))); }
private BinaryDocValues LoadBytesFixedDeref(FieldInfo field) { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx"); IndexInput data = null; IndexInput index = null; bool success = false; try { data = dir.OpenInput(dataName, state.Context); CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); index = dir.OpenInput(indexName, state.Context); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); int fixedLength = data.ReadInt32(); int valueCount = index.ReadInt32(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, fixedLength * (long)valueCount); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInt32s.Reader reader = PackedInt32s.GetReader(index); CodecUtil.CheckEOF(data); CodecUtil.CheckEOF(index); ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); success = true; return(new BinaryDocValuesAnonymousClass3(fixedLength, bytesReader, reader)); } finally { if (success) { IOUtils.Dispose(data, index); } else { IOUtils.DisposeWhileHandlingException(data, index); } } }
private BinaryDocValues LoadBinary(FieldInfo field) { BinaryEntry entry = binaries[field.Number]; data.Seek(entry.Offset); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, entry.NumBytes); PagedBytes.Reader bytesReader = bytes.Freeze(true); if (entry.MinLength == entry.MaxLength) { int fixedLength = entry.MinLength; ramBytesUsed.AddAndGet(bytes.RamBytesUsed()); return(new BinaryDocValuesAnonymousInnerClassHelper(bytesReader, fixedLength)); } else { MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, maxDoc, false); ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + addresses.RamBytesUsed()); return(new BinaryDocValuesAnonymousInnerClassHelper2(bytesReader, addresses)); } }
private BinaryDocValues LoadBytesVarDeref(FieldInfo field) { string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx"); IndexInput data = null; IndexInput index = null; bool success = false; try { data = Dir.OpenInput(dataName, State.Context); CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); index = Dir.OpenInput(indexName, State.Context); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); long totalBytes = index.ReadLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, totalBytes); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader reader = PackedInts.GetReader(index); CodecUtil.CheckEOF(data); CodecUtil.CheckEOF(index); RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); success = true; return(new BinaryDocValuesAnonymousInnerClassHelper4(this, bytesReader, reader)); } finally { if (success) { IOUtils.Close(data, index); } else { IOUtils.CloseWhileHandlingException(data, index); } } }
public BinaryDocValuesAnonymousInnerClassHelper2(Lucene42DocValuesProducer outerInstance, PagedBytes.Reader bytesReader, MonotonicBlockPackedReader addresses) { this.OuterInstance = outerInstance; this.BytesReader = bytesReader; this.Addresses = addresses; }
private BinaryDocValues LoadBytesVarDeref(FieldInfo field) { string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx"); IndexInput data = null; IndexInput index = null; bool success = false; try { data = Dir.OpenInput(dataName, State.Context); CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); index = Dir.OpenInput(indexName, State.Context); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); long totalBytes = index.ReadLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, totalBytes); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader reader = PackedInts.GetReader(index); CodecUtil.CheckEOF(data); CodecUtil.CheckEOF(index); RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); success = true; return new BinaryDocValuesAnonymousInnerClassHelper4(this, bytesReader, reader); } finally { if (success) { IOUtils.Close(data, index); } else { IOUtils.CloseWhileHandlingException(data, index); } } }
/// <summary> /// Call this only once (if you subclass!) </summary> protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix) { FieldInfo info = reader.FieldInfos.FieldInfo(m_field); if (info != null && info.HasDocValues) { throw new InvalidOperationException("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType); } //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); long startTime = Environment.TickCount; m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix); int maxDoc = reader.MaxDoc; int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number int[] lastTerm = new int[maxDoc]; // last term we saw for this document var bytes = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) Fields fields = reader.Fields; if (fields == null) { // No terms return; } Terms terms = fields.GetTerms(m_field); if (terms == null) { // No terms return; } TermsEnum te = terms.GetIterator(null); BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef(); //System.out.println("seekStart=" + seekStart.utf8ToString()); if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END) { // No terms match return; } // If we need our "term index wrapper", these will be // init'd below: IList <BytesRef> indexedTerms = null; PagedBytes indexedTermsBytes = null; bool testedOrd = false; // we need a minimum of 9 bytes, but round up to 12 since the space would // be wasted with most allocators anyway. var tempArr = new sbyte[12]; // // enumerate all terms, and build an intermediate form of the un-inverted field. // // During this intermediate form, every document has a (potential) byte[] // and the int[maxDoc()] array either contains the termNumber list directly // or the *end* offset of the termNumber list in it's byte array (for faster // appending and faster creation of the final form). // // idea... if things are too large while building, we could do a range of docs // at a time (but it would be a fair amount slower to build) // could also do ranges in parallel to take advantage of multiple CPUs // OPTIONAL: remap the largest df terms to the lowest 128 (single byte) // values. this requires going over the field first to find the most // frequent terms ahead of time. int termNum = 0; m_docsEnum = null; // Loop begins with te positioned to first term (we call // seek above): for (; ;) { BytesRef t = te.Term; if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix))) { break; } //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum); if (!testedOrd) { try { m_ordBase = (int)te.Ord; //System.out.println("got ordBase=" + ordBase); } #pragma warning disable 168 catch (NotSupportedException uoe) #pragma warning restore 168 { // Reader cannot provide ord support, so we wrap // our own support by creating our own terms index: indexedTerms = new List <BytesRef>(); indexedTermsBytes = new PagedBytes(15); //System.out.println("NO ORDS"); } testedOrd = true; } VisitTerm(te, termNum); if (indexedTerms != null && (termNum & indexIntervalMask) == 0) { // Index this term m_sizeOfIndexedStrings += t.Length; BytesRef indexedTerm = new BytesRef(); indexedTermsBytes.Copy(t, indexedTerm); // TODO: really should 1) strip off useless suffix, // and 2) use FST not array/PagedBytes indexedTerms.Add(indexedTerm); } int df = te.DocFreq; if (df <= m_maxTermDocFreq) { m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE); // dF, but takes deletions into account int actualDF = 0; for (; ;) { int doc = m_docsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } //System.out.println(" chunk=" + chunk + " docs"); actualDF++; m_termInstances++; //System.out.println(" docID=" + doc); // add TNUM_OFFSET to the term number to make room for special reserved values: // 0 (end term) and 1 (index into byte array follows) int delta = termNum - lastTerm[doc] + TNUM_OFFSET; lastTerm[doc] = termNum; int val = index[doc]; if ((val & 0xff) == 1) { // index into byte array (actually the end of // the doc-specific byte[] when building) int pos = (int)((uint)val >> 8); int ilen = VInt32Size(delta); var arr = bytes[doc]; int newend = pos + ilen; if (newend > arr.Length) { // We avoid a doubling strategy to lower memory usage. // this faceting method isn't for docs with many terms. // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary. // TODO: figure out what array lengths we can round up to w/o actually using more memory // (how much space does a byte[] take up? Is data preceded by a 32 bit length only? // It should be safe to round up to the nearest 32 bits in any case. int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment var newarr = new sbyte[newLen]; Array.Copy(arr, 0, newarr, 0, pos); arr = newarr; bytes[doc] = newarr; } pos = WriteInt32(delta, arr, pos); index[doc] = (pos << 8) | 1; // update pointer to end index in byte[] } else { // OK, this int has data in it... find the end (a zero starting byte - not // part of another number, hence not following a byte with the high bit set). int ipos; if (val == 0) { ipos = 0; } else if ((val & 0x0000ff80) == 0) { ipos = 1; } else if ((val & 0x00ff8000) == 0) { ipos = 2; } else if ((val & 0xff800000) == 0) { ipos = 3; } else { ipos = 4; } //System.out.println(" ipos=" + ipos); int endPos = WriteInt32(delta, tempArr, ipos); //System.out.println(" endpos=" + endPos); if (endPos <= 4) { //System.out.println(" fits!"); // value will fit in the integer... move bytes back for (int j = ipos; j < endPos; j++) { val |= (tempArr[j] & 0xff) << (j << 3); } index[doc] = val; } else { // value won't fit... move integer into byte[] for (int j = 0; j < ipos; j++) { tempArr[j] = (sbyte)val; val = (int)((uint)val >> 8); } // point at the end index in the byte[] index[doc] = (endPos << 8) | 1; bytes[doc] = tempArr; tempArr = new sbyte[12]; } } } SetActualDocFreq(termNum, actualDF); } termNum++; if (te.Next() == null) { break; } } m_numTermsInField = termNum; long midPoint = Environment.TickCount; if (m_termInstances == 0) { // we didn't invert anything // lower memory consumption. m_tnums = null; } else { this.m_index = index; // // transform intermediate form into the final form, building a single byte[] // at a time, and releasing the intermediate byte[]s as we go to avoid // increasing the memory footprint. // for (int pass = 0; pass < 256; pass++) { var target = m_tnums[pass]; var pos = 0; // end in target; if (target != null) { pos = target.Length; } else { target = new sbyte[4096]; } // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx // where pp is the pass (which array we are building), and xx is all values. // each pass shares the same byte[] for termNumber lists. for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) { int lim = Math.Min(docbase + (1 << 16), maxDoc); for (int doc = docbase; doc < lim; doc++) { //System.out.println(" pass="******" process docID=" + doc); int val = index[doc]; if ((val & 0xff) == 1) { int len = (int)((uint)val >> 8); //System.out.println(" ptr pos=" + pos); index[doc] = (pos << 8) | 1; // change index to point to start of array if ((pos & 0xff000000) != 0) { // we only have 24 bits for the array index throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + m_field); } var arr = bytes[doc]; /* * for(byte b : arr) { * //System.out.println(" b=" + Integer.toHexString((int) b)); * } */ bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM if (target.Length <= pos + len) { int newlen = target.Length; //* we don't have to worry about the array getting too large // since the "pos" param will overflow first (only 24 bits available) // if ((newlen<<1) <= 0) { // // overflow... // newlen = Integer.MAX_VALUE; // if (newlen <= pos + len) { // throw new SolrException(400,"Too many terms to uninvert field!"); // } // } else { // while (newlen <= pos + len) newlen<<=1; // doubling strategy // } // while (newlen <= pos + len) // doubling strategy { newlen <<= 1; } var newtarget = new sbyte[newlen]; Array.Copy(target, 0, newtarget, 0, pos); target = newtarget; } Array.Copy(arr, 0, target, pos, len); pos += len + 1; // skip single byte at end and leave it 0 for terminator } } } // shrink array if (pos < target.Length) { var newtarget = new sbyte[pos]; Array.Copy(target, 0, newtarget, 0, pos); target = newtarget; } m_tnums[pass] = target; if ((pass << 16) > maxDoc) { break; } } } if (indexedTerms != null) { m_indexedTermsArray = new BytesRef[indexedTerms.Count]; indexedTerms.CopyTo(m_indexedTermsArray, 0); } long endTime = Environment.TickCount; m_total_time = (int)(endTime - startTime); m_phase1_time = (int)(midPoint - startTime); }
public BinaryDocValuesAnonymousInnerClassHelper(PagedBytes.Reader bytesReader, int fixedLength) { this.BytesReader = bytesReader; this.FixedLength = fixedLength; }
public BinaryDocValuesAnonymousInnerClassHelper(Lucene42DocValuesProducer outerInstance, PagedBytes.Reader bytesReader, int fixedLength) { this.OuterInstance = outerInstance; this.BytesReader = bytesReader; this.FixedLength = fixedLength; }
private SortedDocValues LoadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); int fixedLength = data.ReadInt(); int valueCount = index.ReadInt(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, fixedLength * (long)valueCount); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader reader = PackedInts.GetReader(index); RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); return CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper(this, fixedLength, valueCount, bytesReader, reader)); }
public SortedDocValuesAnonymousInnerClassHelper(Lucene40DocValuesReader outerInstance, int fixedLength, int valueCount, PagedBytes.Reader bytesReader, PackedInts.Reader reader) { this.OuterInstance = outerInstance; this.FixedLength = fixedLength; this.valueCount = valueCount; this.BytesReader = bytesReader; this.Reader = reader; }
/// <summary> /// Call this only once (if you subclass!) </summary> protected internal virtual void Uninvert(AtomicReader reader, Bits liveDocs, BytesRef termPrefix) { FieldInfo info = reader.FieldInfos.FieldInfo(Field); if (info != null && info.HasDocValues()) { throw new InvalidOperationException("Type mismatch: " + Field + " was indexed as " + info.DocValuesType); } //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); long startTime = DateTime.Now.Millisecond; Prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix); int maxDoc = reader.MaxDoc; int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number int[] lastTerm = new int[maxDoc]; // last term we saw for this document sbyte[][] bytes = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) Fields fields = reader.Fields; if (fields == null) { // No terms return; } Terms terms = fields.Terms(Field); if (terms == null) { // No terms return; } TermsEnum te = terms.Iterator(null); BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef(); //System.out.println("seekStart=" + seekStart.utf8ToString()); if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END) { // No terms match return; } // If we need our "term index wrapper", these will be // init'd below: IList<BytesRef> indexedTerms = null; PagedBytes indexedTermsBytes = null; bool testedOrd = false; // we need a minimum of 9 bytes, but round up to 12 since the space would // be wasted with most allocators anyway. sbyte[] tempArr = new sbyte[12]; // // enumerate all terms, and build an intermediate form of the un-inverted field. // // During this intermediate form, every document has a (potential) byte[] // and the int[maxDoc()] array either contains the termNumber list directly // or the *end* offset of the termNumber list in it's byte array (for faster // appending and faster creation of the final form). // // idea... if things are too large while building, we could do a range of docs // at a time (but it would be a fair amount slower to build) // could also do ranges in parallel to take advantage of multiple CPUs // OPTIONAL: remap the largest df terms to the lowest 128 (single byte) // values. this requires going over the field first to find the most // frequent terms ahead of time. int termNum = 0; DocsEnum = null; // Loop begins with te positioned to first term (we call // seek above): for (; ; ) { BytesRef t = te.Term(); if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix))) { break; } //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum); if (!testedOrd) { try { OrdBase = (int)te.Ord(); //System.out.println("got ordBase=" + ordBase); } catch (System.NotSupportedException uoe) { // Reader cannot provide ord support, so we wrap // our own support by creating our own terms index: indexedTerms = new List<BytesRef>(); indexedTermsBytes = new PagedBytes(15); //System.out.println("NO ORDS"); } testedOrd = true; } VisitTerm(te, termNum); if (indexedTerms != null && (termNum & IndexIntervalMask) == 0) { // Index this term SizeOfIndexedStrings += t.Length; BytesRef indexedTerm = new BytesRef(); indexedTermsBytes.Copy(t, indexedTerm); // TODO: really should 1) strip off useless suffix, // and 2) use FST not array/PagedBytes indexedTerms.Add(indexedTerm); } int df = te.DocFreq(); if (df <= MaxTermDocFreq) { DocsEnum = te.Docs(liveDocs, DocsEnum, DocsEnum.FLAG_NONE); // dF, but takes deletions into account int actualDF = 0; for (; ; ) { int doc = DocsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } //System.out.println(" chunk=" + chunk + " docs"); actualDF++; TermInstances++; //System.out.println(" docID=" + doc); // add TNUM_OFFSET to the term number to make room for special reserved values: // 0 (end term) and 1 (index into byte array follows) int delta = termNum - lastTerm[doc] + TNUM_OFFSET; lastTerm[doc] = termNum; int val = index[doc]; if ((val & 0xff) == 1) { // index into byte array (actually the end of // the doc-specific byte[] when building) int pos = (int)((uint)val >> 8); int ilen = VIntSize(delta); sbyte[] arr = bytes[doc]; int newend = pos + ilen; if (newend > arr.Length) { // We avoid a doubling strategy to lower memory usage. // this faceting method isn't for docs with many terms. // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary. // TODO: figure out what array lengths we can round up to w/o actually using more memory // (how much space does a byte[] take up? Is data preceded by a 32 bit length only? // It should be safe to round up to the nearest 32 bits in any case. int newLen = (newend + 3) & unchecked((int)0xfffffffc); // 4 byte alignment sbyte[] newarr = new sbyte[newLen]; Array.Copy(arr, 0, newarr, 0, pos); arr = newarr; bytes[doc] = newarr; } pos = WriteInt(delta, arr, pos); index[doc] = (pos << 8) | 1; // update pointer to end index in byte[] } else { // OK, this int has data in it... find the end (a zero starting byte - not // part of another number, hence not following a byte with the high bit set). int ipos; if (val == 0) { ipos = 0; } else if ((val & 0x0000ff80) == 0) { ipos = 1; } else if ((val & 0x00ff8000) == 0) { ipos = 2; } else if ((val & 0xff800000) == 0) { ipos = 3; } else { ipos = 4; } //System.out.println(" ipos=" + ipos); int endPos = WriteInt(delta, tempArr, ipos); //System.out.println(" endpos=" + endPos); if (endPos <= 4) { //System.out.println(" fits!"); // value will fit in the integer... move bytes back for (int j = ipos; j < endPos; j++) { val |= (tempArr[j] & 0xff) << (j << 3); } index[doc] = val; } else { // value won't fit... move integer into byte[] for (int j = 0; j < ipos; j++) { tempArr[j] = (sbyte)val; val = (int)((uint)val >> 8); } // point at the end index in the byte[] index[doc] = (endPos << 8) | 1; bytes[doc] = tempArr; tempArr = new sbyte[12]; } } } SetActualDocFreq(termNum, actualDF); } termNum++; if (te.Next() == null) { break; } } NumTermsInField = termNum; long midPoint = DateTime.Now.Millisecond; if (TermInstances == 0) { // we didn't invert anything // lower memory consumption. Tnums = null; } else { this.Index = index; // // transform intermediate form into the final form, building a single byte[] // at a time, and releasing the intermediate byte[]s as we go to avoid // increasing the memory footprint. // for (int pass = 0; pass < 256; pass++) { sbyte[] target = Tnums[pass]; int pos = 0; // end in target; if (target != null) { pos = target.Length; } else { target = new sbyte[4096]; } // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx // where pp is the pass (which array we are building), and xx is all values. // each pass shares the same byte[] for termNumber lists. for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) { int lim = Math.Min(docbase + (1 << 16), maxDoc); for (int doc = docbase; doc < lim; doc++) { //System.out.println(" pass="******" process docID=" + doc); int val = index[doc]; if ((val & 0xff) == 1) { int len = (int)((uint)val >> 8); //System.out.println(" ptr pos=" + pos); index[doc] = (pos << 8) | 1; // change index to point to start of array if ((pos & 0xff000000) != 0) { // we only have 24 bits for the array index throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + Field); } sbyte[] arr = bytes[doc]; /* for(byte b : arr) { //System.out.println(" b=" + Integer.toHexString((int) b)); } */ bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM if (target.Length <= pos + len) { int newlen = target.Length; /// <summary> ///* we don't have to worry about the array getting too large /// since the "pos" param will overflow first (only 24 bits available) /// if ((newlen<<1) <= 0) { /// // overflow... /// newlen = Integer.MAX_VALUE; /// if (newlen <= pos + len) { /// throw new SolrException(400,"Too many terms to uninvert field!"); /// } /// } else { /// while (newlen <= pos + len) newlen<<=1; // doubling strategy /// } /// *** /// </summary> while (newlen <= pos + len) // doubling strategy { newlen <<= 1; } sbyte[] newtarget = new sbyte[newlen]; Array.Copy(target, 0, newtarget, 0, pos); target = newtarget; } Array.Copy(arr, 0, target, pos, len); pos += len + 1; // skip single byte at end and leave it 0 for terminator } } } // shrink array if (pos < target.Length) { sbyte[] newtarget = new sbyte[pos]; Array.Copy(target, 0, newtarget, 0, pos); target = newtarget; } Tnums[pass] = target; if ((pass << 16) > maxDoc) { break; } } } if (indexedTerms != null) { IndexedTermsArray = indexedTerms.ToArray(); } long endTime = DateTime.Now.Millisecond; Total_time = (int)(endTime - startTime); Phase1_time = (int)(midPoint - startTime); }
private SortedDocValues LoadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); long maxAddress = index.ReadLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, maxAddress); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader addressReader = PackedInts.GetReader(index); PackedInts.Reader ordsReader = PackedInts.GetReader(index); int valueCount = addressReader.Size() - 1; RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed()); return CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper2(this, bytesReader, addressReader, ordsReader, valueCount)); }
public BinaryDocValuesAnonymousInnerClassHelper2(PagedBytes.Reader bytesReader, MonotonicBlockPackedReader addresses) { this.BytesReader = bytesReader; this.Addresses = addresses; }
private BinaryDocValues LoadBytesFixedStraight(FieldInfo field) { string fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); IndexInput input = Dir.OpenInput(fileName, State.Context); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_CODEC_NAME, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_CURRENT); int fixedLength = input.ReadInt(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(input, fixedLength * (long)State.SegmentInfo.DocCount); PagedBytes.Reader bytesReader = bytes.Freeze(true); CodecUtil.CheckEOF(input); success = true; RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed()); return new BinaryDocValuesAnonymousInnerClassHelper(this, fixedLength, bytesReader); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
public BinaryDocValuesAnonymousInnerClassHelper4(Lucene40DocValuesReader outerInstance, PagedBytes.Reader bytesReader, PackedInts.Reader reader) { this.OuterInstance = outerInstance; this.BytesReader = bytesReader; this.Reader = reader; }
private BinaryDocValues LoadBinary(FieldInfo field) { BinaryEntry entry = Binaries[field.Number]; Data.Seek(entry.Offset); PagedBytes bytes = new PagedBytes(16); bytes.Copy(Data, entry.NumBytes); PagedBytes.Reader bytesReader = bytes.Freeze(true); if (entry.MinLength == entry.MaxLength) { int fixedLength = entry.MinLength; RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed()); return new BinaryDocValuesAnonymousInnerClassHelper(this, bytesReader, fixedLength); } else { MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(Data, entry.PackedIntsVersion, entry.BlockSize, MaxDoc, false); RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addresses.RamBytesUsed()); return new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, addresses); } }
public SortedDocValuesAnonymousInnerClassHelper2(Lucene40DocValuesReader outerInstance, PagedBytes.Reader bytesReader, PackedInts.Reader addressReader, PackedInts.Reader ordsReader, int valueCount) { this.OuterInstance = outerInstance; this.BytesReader = bytesReader; this.AddressReader = addressReader; this.OrdsReader = ordsReader; this.valueCount = valueCount; }