/// <summary> /// Allocates internal skip buffers. </summary> protected internal virtual void Init() { SkipBuffer = new RAMOutputStream[NumberOfSkipLevels]; for (int i = 0; i < NumberOfSkipLevels; i++) { SkipBuffer[i] = new RAMOutputStream(); } }
/// <summary> /// Allocates internal skip buffers. </summary> protected virtual void Init() { skipBuffer = new RAMOutputStream[m_numberOfSkipLevels]; for (int i = 0; i < m_numberOfSkipLevels; i++) { skipBuffer[i] = new RAMOutputStream(); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, final org.apache.lucene.index.DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: internal SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, DocsAndPositionsEnum @in, Sorter.DocMap docMap, bool storeOffsets) : base(@in) { this.maxDoc = maxDoc; this.storeOffsets = storeOffsets; if (reuse != null) { docs = reuse.docs; offsets = reuse.offsets; payload = reuse.payload; file = reuse.file; if (reuse.maxDoc == maxDoc) { sorter = reuse.sorter; } else { sorter = new DocOffsetSorter(maxDoc); } } else { docs = new int[32]; offsets = new long[32]; payload = new BytesRef(32); file = new RAMFile(); sorter = new DocOffsetSorter(maxDoc); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.store.IndexOutput out = new org.apache.lucene.store.RAMOutputStream(file); IndexOutput @out = new RAMOutputStream(file); int doc; int i = 0; while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (i == docs.Length) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int newLength = org.apache.lucene.util.ArrayUtil.oversize(i + 1, 4); int newLength = ArrayUtil.oversize(i + 1, 4); docs = Arrays.copyOf(docs, newLength); offsets = Arrays.copyOf(offsets, newLength); } docs[i] = docMap.oldToNew(doc); offsets[i] = @out.FilePointer; addPositions(@in, @out); i++; } upto = i; sorter.reset(docs, offsets); sorter.sort(0, upto); @out.close(); this.postingInput = new RAMInputStream("", file); }
internal SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, DocsAndPositionsEnum @in, Sorter.DocMap docMap, bool storeOffsets) : base(@in) { this.maxDoc = maxDoc; this.storeOffsets = storeOffsets; if (reuse != null) { docs = reuse.docs; offsets = reuse.offsets; payload = reuse.payload; file = reuse.file; if (reuse.maxDoc == maxDoc) { sorter = reuse.sorter; } else { sorter = new DocOffsetSorter(maxDoc); } } else { docs = new int[32]; offsets = new long[32]; payload = new BytesRef(32); file = new RAMFile(); sorter = new DocOffsetSorter(maxDoc); } using (IndexOutput @out = new RAMOutputStream(file)) { int doc; int i = 0; while ((doc = @in.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (i == docs.Length) { int newLength = ArrayUtil.Oversize(i + 1, 4); docs = Arrays.CopyOf(docs, newLength); offsets = Arrays.CopyOf(offsets, newLength); } docs[i] = docMap.OldToNew(doc); offsets[i] = @out.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream AddPositions(@in, @out); i++; } upto = i; sorter.Reset(docs, offsets); sorter.Sort(0, upto); } this.postingInput = new RAMInputStream("", file); }
/// <summary> /// expert: writes a value dictionary for a sorted/sortedset field </summary> protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable<BytesRef> values) { // first check if its a "fixed-length" terms dict int minLength = int.MaxValue; int maxLength = int.MinValue; foreach (BytesRef v in values) { minLength = Math.Min(minLength, v.Length); maxLength = Math.Max(maxLength, v.Length); } if (minLength == maxLength) { // no index needed: direct addressing by mult AddBinaryField(field, values); } else { // header Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY); Meta.WriteVInt(BINARY_PREFIX_COMPRESSED); Meta.WriteLong(-1L); // now write the bytes: sharing prefixes within a block long startFP = Data.FilePointer; // currently, we have to store the delta from expected for every 1/nth term // we could avoid this, but its not much and less overall RAM than the previous approach! RAMOutputStream addressBuffer = new RAMOutputStream(); MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE); BytesRef lastTerm = new BytesRef(); long count = 0; foreach (BytesRef v in values) { if (count % ADDRESS_INTERVAL == 0) { termAddresses.Add(Data.FilePointer - startFP); // force the first term in a block to be abs-encoded lastTerm.Length = 0; } // prefix-code int sharedPrefix = StringHelper.BytesDifference(lastTerm, v); Data.WriteVInt(sharedPrefix); Data.WriteVInt(v.Length - sharedPrefix); Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix); lastTerm.CopyBytes(v); count++; } long indexStartFP = Data.FilePointer; // write addresses of indexed terms termAddresses.Finish(); addressBuffer.WriteTo(Data); addressBuffer = null; termAddresses = null; Meta.WriteVInt(minLength); Meta.WriteVInt(maxLength); Meta.WriteVLong(count); Meta.WriteLong(startFP); Meta.WriteVInt(ADDRESS_INTERVAL); Meta.WriteLong(indexStartFP); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteVInt(BLOCK_SIZE); } }
/// <summary> /// expert: writes a value dictionary for a sorted/sortedset field </summary> protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable <BytesRef> values) { // first check if its a "fixed-length" terms dict int minLength = int.MaxValue; int maxLength = int.MinValue; foreach (BytesRef v in values) { minLength = Math.Min(minLength, v.Length); maxLength = Math.Max(maxLength, v.Length); } if (minLength == maxLength) { // no index needed: direct addressing by mult AddBinaryField(field, values); } else { // header Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY); Meta.WriteVInt(BINARY_PREFIX_COMPRESSED); Meta.WriteLong(-1L); // now write the bytes: sharing prefixes within a block long startFP = Data.FilePointer; // currently, we have to store the delta from expected for every 1/nth term // we could avoid this, but its not much and less overall RAM than the previous approach! RAMOutputStream addressBuffer = new RAMOutputStream(); MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE); BytesRef lastTerm = new BytesRef(); long count = 0; foreach (BytesRef v in values) { if (count % ADDRESS_INTERVAL == 0) { termAddresses.Add(Data.FilePointer - startFP); // force the first term in a block to be abs-encoded lastTerm.Length = 0; } // prefix-code int sharedPrefix = StringHelper.BytesDifference(lastTerm, v); Data.WriteVInt(sharedPrefix); Data.WriteVInt(v.Length - sharedPrefix); Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix); lastTerm.CopyBytes(v); count++; } long indexStartFP = Data.FilePointer; // write addresses of indexed terms termAddresses.Finish(); addressBuffer.WriteTo(Data); addressBuffer = null; termAddresses = null; Meta.WriteVInt(minLength); Meta.WriteVInt(maxLength); Meta.WriteVLong(count); Meta.WriteLong(startFP); Meta.WriteVInt(ADDRESS_INTERVAL); Meta.WriteLong(indexStartFP); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteVInt(BLOCK_SIZE); } }
internal virtual void InitializeInstanceFields() { output = new RAMOutputStream(buffer); }
public Builder() { output = new RAMOutputStream(buffer); }
public DirectField(SegmentReadState state, string field, Terms termsIn, int minSkipCount, int lowFreqCutoff) { FieldInfo fieldInfo = state.FieldInfos.FieldInfo(field); sumTotalTermFreq = termsIn.SumTotalTermFreq; sumDocFreq = termsIn.SumDocFreq; docCount = termsIn.DocCount; int numTerms = (int) termsIn.Size(); if (numTerms == -1) { throw new System.ArgumentException("codec does not provide Terms.size()"); } terms = new TermAndSkip[numTerms]; termOffsets = new int[1 + numTerms]; byte[] termBytes = new byte[1024]; this.minSkipCount = minSkipCount; hasFreq = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_ONLY) > 0; hasPos = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_AND_FREQS) > 0; hasOffsets_Renamed = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0; hasPayloads_Renamed = fieldInfo.HasPayloads(); BytesRef term; DocsEnum docsEnum = null; DocsAndPositionsEnum docsAndPositionsEnum = null; TermsEnum termsEnum = termsIn.Iterator(null); int termOffset = 0; IntArrayWriter scratch = new IntArrayWriter(); // Used for payloads, if any: RAMOutputStream ros = new RAMOutputStream(); // if (DEBUG) { // System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads); // } while ((term = termsEnum.Next()) != null) { int docFreq = termsEnum.DocFreq(); long totalTermFreq = termsEnum.TotalTermFreq(); // if (DEBUG) { // System.out.println(" term=" + term.utf8ToString()); // } termOffsets[count] = termOffset; if (termBytes.Length < (termOffset + term.Length)) { termBytes = ArrayUtil.Grow(termBytes, termOffset + term.Length); } Array.Copy(term.Bytes, term.Offset, termBytes, termOffset, term.Length); termOffset += term.Length; termOffsets[count + 1] = termOffset; if (hasPos) { docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum); } else { docsEnum = termsEnum.Docs(null, docsEnum); } TermAndSkip ent; DocsEnum docsEnum2; docsEnum2 = hasPos ? docsAndPositionsEnum : docsEnum; int docID; if (docFreq <= lowFreqCutoff) { ros.Reset(); // Pack postings for low-freq terms into a single int[]: while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS) { scratch.Add(docID); if (hasFreq) { int freq = docsEnum2.Freq(); scratch.Add(freq); if (hasPos) { for (int pos = 0; pos < freq; pos++) { scratch.Add(docsAndPositionsEnum.NextPosition()); if (hasOffsets_Renamed) { scratch.Add(docsAndPositionsEnum.StartOffset()); scratch.Add(docsAndPositionsEnum.EndOffset()); } if (hasPayloads_Renamed) { BytesRef payload = docsAndPositionsEnum.Payload; if (payload != null) { scratch.Add(payload.Length); ros.WriteBytes(payload.Bytes, payload.Offset, payload.Length); } else { scratch.Add(0); } } } } } } byte[] payloads; if (hasPayloads_Renamed) { ros.Flush(); payloads = new byte[(int) ros.Length]; ros.WriteTo(payloads, 0); } else { payloads = null; } int[] postings = scratch.Get(); ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq); } else { var docs = new int[docFreq]; int[] freqs; int[][] positions; byte[][][] payloads; if (hasFreq) { freqs = new int[docFreq]; if (hasPos) { positions = new int[docFreq][]; if (hasPayloads_Renamed) { payloads = new byte[docFreq][][]; } else { payloads = null; } } else { positions = null; payloads = null; } } else { freqs = null; positions = null; payloads = null; } // Use separate int[] for the postings for high-freq // terms: int upto = 0; while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS) { docs[upto] = docID; if (hasFreq) { int freq = docsEnum2.Freq(); freqs[upto] = freq; if (hasPos) { int mult; if (hasOffsets_Renamed) { mult = 3; } else { mult = 1; } if (hasPayloads_Renamed) { payloads[upto] = new byte[freq][]; } positions[upto] = new int[mult*freq]; int posUpto = 0; for (int pos = 0; pos < freq; pos++) { positions[upto][posUpto] = docsAndPositionsEnum.NextPosition(); if (hasPayloads_Renamed) { BytesRef payload = docsAndPositionsEnum.Payload; if (payload != null) { var payloadBytes = new byte[payload.Length]; Array.Copy(payload.Bytes, payload.Offset, payloadBytes, 0, payload.Length); payloads[upto][pos] = payloadBytes; } } posUpto++; if (hasOffsets_Renamed) { positions[upto][posUpto++] = docsAndPositionsEnum.StartOffset(); positions[upto][posUpto++] = docsAndPositionsEnum.EndOffset(); } } } } upto++; } Debug.Assert(upto == docFreq); ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq); } terms[count] = ent; SetSkips(count, termBytes); count++; } // End sentinel: termOffsets[count] = termOffset; FinishSkips(); //System.out.println(skipCount + " skips: " + field); this.termBytes = new byte[termOffset]; Array.Copy(termBytes, 0, this.termBytes, 0, termOffset); // Pack skips: this.skips = new int[skipCount]; this.skipOffsets = new int[1 + numTerms]; int skipOffset = 0; for (int i = 0; i < numTerms; i++) { int[] termSkips = terms[i].skips; skipOffsets[i] = skipOffset; if (termSkips != null) { Array.Copy(termSkips, 0, skips, skipOffset, termSkips.Length); skipOffset += termSkips.Length; terms[i].skips = null; } } this.skipOffsets[numTerms] = skipOffset; Debug.Assert(skipOffset == skipCount); }
// Writes the contents of buffer into the fields stream // and adds a new entry for this document into the index // stream. This assumes the buffer was already written // in the correct fields format. internal void FlushDocument(int numStoredFields, RAMOutputStream buffer) { indexStream.WriteLong(fieldsStream.FilePointer); fieldsStream.WriteVInt(numStoredFields); buffer.WriteTo(fieldsStream); }
private void InitBlock(TermVectorsTermsWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; buffer = enclosingInstance.docWriter.NewPerDocBuffer(); perDocTvf = new RAMOutputStream(buffer); }
private void InitBlock(StoredFieldsWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; buffer = enclosingInstance.docWriter.NewPerDocBuffer(); fdt = new RAMOutputStream(buffer); }