Пример #1
0
 /// <summary>
 /// Allocates internal skip buffers. </summary>
 protected internal virtual void Init()
 {
     SkipBuffer = new RAMOutputStream[NumberOfSkipLevels];
     for (int i = 0; i < NumberOfSkipLevels; i++)
     {
         SkipBuffer[i] = new RAMOutputStream();
     }
 }
Пример #2
0
 /// <summary>
 /// Allocates internal skip buffers. </summary>
 protected virtual void Init()
 {
     skipBuffer = new RAMOutputStream[m_numberOfSkipLevels];
     for (int i = 0; i < m_numberOfSkipLevels; i++)
     {
         skipBuffer[i] = new RAMOutputStream();
     }
 }
Пример #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, final org.apache.lucene.index.DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
            internal SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, DocsAndPositionsEnum @in, Sorter.DocMap docMap, bool storeOffsets) : base(@in)
            {
                this.maxDoc       = maxDoc;
                this.storeOffsets = storeOffsets;
                if (reuse != null)
                {
                    docs    = reuse.docs;
                    offsets = reuse.offsets;
                    payload = reuse.payload;
                    file    = reuse.file;
                    if (reuse.maxDoc == maxDoc)
                    {
                        sorter = reuse.sorter;
                    }
                    else
                    {
                        sorter = new DocOffsetSorter(maxDoc);
                    }
                }
                else
                {
                    docs    = new int[32];
                    offsets = new long[32];
                    payload = new BytesRef(32);
                    file    = new RAMFile();
                    sorter  = new DocOffsetSorter(maxDoc);
                }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.store.IndexOutput out = new org.apache.lucene.store.RAMOutputStream(file);
                IndexOutput @out = new RAMOutputStream(file);
                int         doc;
                int         i = 0;

                while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    if (i == docs.Length)
                    {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int newLength = org.apache.lucene.util.ArrayUtil.oversize(i + 1, 4);
                        int newLength = ArrayUtil.oversize(i + 1, 4);
                        docs    = Arrays.copyOf(docs, newLength);
                        offsets = Arrays.copyOf(offsets, newLength);
                    }
                    docs[i]    = docMap.oldToNew(doc);
                    offsets[i] = @out.FilePointer;
                    addPositions(@in, @out);
                    i++;
                }
                upto = i;
                sorter.reset(docs, offsets);
                sorter.sort(0, upto);
                @out.close();
                this.postingInput = new RAMInputStream("", file);
            }
Пример #4
0
 internal SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, DocsAndPositionsEnum @in, Sorter.DocMap docMap, bool storeOffsets)
     : base(@in)
 {
     this.maxDoc       = maxDoc;
     this.storeOffsets = storeOffsets;
     if (reuse != null)
     {
         docs    = reuse.docs;
         offsets = reuse.offsets;
         payload = reuse.payload;
         file    = reuse.file;
         if (reuse.maxDoc == maxDoc)
         {
             sorter = reuse.sorter;
         }
         else
         {
             sorter = new DocOffsetSorter(maxDoc);
         }
     }
     else
     {
         docs    = new int[32];
         offsets = new long[32];
         payload = new BytesRef(32);
         file    = new RAMFile();
         sorter  = new DocOffsetSorter(maxDoc);
     }
     using (IndexOutput @out = new RAMOutputStream(file))
     {
         int doc;
         int i = 0;
         while ((doc = @in.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
         {
             if (i == docs.Length)
             {
                 int newLength = ArrayUtil.Oversize(i + 1, 4);
                 docs    = Arrays.CopyOf(docs, newLength);
                 offsets = Arrays.CopyOf(offsets, newLength);
             }
             docs[i]    = docMap.OldToNew(doc);
             offsets[i] = @out.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
             AddPositions(@in, @out);
             i++;
         }
         upto = i;
         sorter.Reset(docs, offsets);
         sorter.Sort(0, upto);
     }
     this.postingInput = new RAMInputStream("", file);
 }
        /// <summary>
        /// expert: writes a value dictionary for a sorted/sortedset field </summary>
        protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable<BytesRef> values)
        {
            // first check if its a "fixed-length" terms dict
            int minLength = int.MaxValue;
            int maxLength = int.MinValue;
            foreach (BytesRef v in values)
            {
                minLength = Math.Min(minLength, v.Length);
                maxLength = Math.Max(maxLength, v.Length);
            }
            if (minLength == maxLength)
            {
                // no index needed: direct addressing by mult
                AddBinaryField(field, values);
            }
            else
            {
                // header
                Meta.WriteVInt(field.Number);
                Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY);
                Meta.WriteVInt(BINARY_PREFIX_COMPRESSED);
                Meta.WriteLong(-1L);
                // now write the bytes: sharing prefixes within a block
                long startFP = Data.FilePointer;
                // currently, we have to store the delta from expected for every 1/nth term
                // we could avoid this, but its not much and less overall RAM than the previous approach!
                RAMOutputStream addressBuffer = new RAMOutputStream();
                MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE);
                BytesRef lastTerm = new BytesRef();
                long count = 0;
                foreach (BytesRef v in values)
                {
                    if (count % ADDRESS_INTERVAL == 0)
                    {
                        termAddresses.Add(Data.FilePointer - startFP);
                        // force the first term in a block to be abs-encoded
                        lastTerm.Length = 0;
                    }

                    // prefix-code
                    int sharedPrefix = StringHelper.BytesDifference(lastTerm, v);
                    Data.WriteVInt(sharedPrefix);
                    Data.WriteVInt(v.Length - sharedPrefix);
                    Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix);
                    lastTerm.CopyBytes(v);
                    count++;
                }
                long indexStartFP = Data.FilePointer;
                // write addresses of indexed terms
                termAddresses.Finish();
                addressBuffer.WriteTo(Data);
                addressBuffer = null;
                termAddresses = null;
                Meta.WriteVInt(minLength);
                Meta.WriteVInt(maxLength);
                Meta.WriteVLong(count);
                Meta.WriteLong(startFP);
                Meta.WriteVInt(ADDRESS_INTERVAL);
                Meta.WriteLong(indexStartFP);
                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Meta.WriteVInt(BLOCK_SIZE);
            }
        }
Пример #6
0
        /// <summary>
        /// expert: writes a value dictionary for a sorted/sortedset field </summary>
        protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable <BytesRef> values)
        {
            // first check if its a "fixed-length" terms dict
            int minLength = int.MaxValue;
            int maxLength = int.MinValue;

            foreach (BytesRef v in values)
            {
                minLength = Math.Min(minLength, v.Length);
                maxLength = Math.Max(maxLength, v.Length);
            }
            if (minLength == maxLength)
            {
                // no index needed: direct addressing by mult
                AddBinaryField(field, values);
            }
            else
            {
                // header
                Meta.WriteVInt(field.Number);
                Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY);
                Meta.WriteVInt(BINARY_PREFIX_COMPRESSED);
                Meta.WriteLong(-1L);
                // now write the bytes: sharing prefixes within a block
                long startFP = Data.FilePointer;
                // currently, we have to store the delta from expected for every 1/nth term
                // we could avoid this, but its not much and less overall RAM than the previous approach!
                RAMOutputStream            addressBuffer = new RAMOutputStream();
                MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE);
                BytesRef lastTerm = new BytesRef();
                long     count    = 0;
                foreach (BytesRef v in values)
                {
                    if (count % ADDRESS_INTERVAL == 0)
                    {
                        termAddresses.Add(Data.FilePointer - startFP);
                        // force the first term in a block to be abs-encoded
                        lastTerm.Length = 0;
                    }

                    // prefix-code
                    int sharedPrefix = StringHelper.BytesDifference(lastTerm, v);
                    Data.WriteVInt(sharedPrefix);
                    Data.WriteVInt(v.Length - sharedPrefix);
                    Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix);
                    lastTerm.CopyBytes(v);
                    count++;
                }
                long indexStartFP = Data.FilePointer;
                // write addresses of indexed terms
                termAddresses.Finish();
                addressBuffer.WriteTo(Data);
                addressBuffer = null;
                termAddresses = null;
                Meta.WriteVInt(minLength);
                Meta.WriteVInt(maxLength);
                Meta.WriteVLong(count);
                Meta.WriteLong(startFP);
                Meta.WriteVInt(ADDRESS_INTERVAL);
                Meta.WriteLong(indexStartFP);
                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Meta.WriteVInt(BLOCK_SIZE);
            }
        }
Пример #7
0
 internal virtual void InitializeInstanceFields()
 {
     output = new RAMOutputStream(buffer);
 }
Пример #8
0
 public Builder()
 {
     output = new RAMOutputStream(buffer);
 }
            public DirectField(SegmentReadState state, string field, Terms termsIn, int minSkipCount, int lowFreqCutoff)
            {
                FieldInfo fieldInfo = state.FieldInfos.FieldInfo(field);

                sumTotalTermFreq = termsIn.SumTotalTermFreq;
                sumDocFreq = termsIn.SumDocFreq;
                docCount = termsIn.DocCount;

                int numTerms = (int) termsIn.Size();
                if (numTerms == -1)
                {
                    throw new System.ArgumentException("codec does not provide Terms.size()");
                }
                terms = new TermAndSkip[numTerms];
                termOffsets = new int[1 + numTerms];

                byte[] termBytes = new byte[1024];

                this.minSkipCount = minSkipCount;

                hasFreq = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_ONLY) > 0;
                hasPos = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_AND_FREQS) > 0;
                hasOffsets_Renamed = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
                hasPayloads_Renamed = fieldInfo.HasPayloads();

                BytesRef term;
                DocsEnum docsEnum = null;
                DocsAndPositionsEnum docsAndPositionsEnum = null;
                TermsEnum termsEnum = termsIn.Iterator(null);
                int termOffset = 0;

                IntArrayWriter scratch = new IntArrayWriter();

                // Used for payloads, if any:
                RAMOutputStream ros = new RAMOutputStream();

                // if (DEBUG) {
                //   System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
                // }

                while ((term = termsEnum.Next()) != null)
                {
                    int docFreq = termsEnum.DocFreq();
                    long totalTermFreq = termsEnum.TotalTermFreq();

                    // if (DEBUG) {
                    //   System.out.println("  term=" + term.utf8ToString());
                    // }

                    termOffsets[count] = termOffset;

                    if (termBytes.Length < (termOffset + term.Length))
                    {
                        termBytes = ArrayUtil.Grow(termBytes, termOffset + term.Length);
                    }
                    Array.Copy(term.Bytes, term.Offset, termBytes, termOffset, term.Length);
                    termOffset += term.Length;
                    termOffsets[count + 1] = termOffset;

                    if (hasPos)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                    }
                    else
                    {
                        docsEnum = termsEnum.Docs(null, docsEnum);
                    }

                    TermAndSkip ent;

                    DocsEnum docsEnum2;
                    docsEnum2 = hasPos ? docsAndPositionsEnum : docsEnum;

                    int docID;

                    if (docFreq <= lowFreqCutoff)
                    {

                        ros.Reset();

                        // Pack postings for low-freq terms into a single int[]:
                        while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                        {
                            scratch.Add(docID);
                            if (hasFreq)
                            {
                                int freq = docsEnum2.Freq();
                                scratch.Add(freq);
                                if (hasPos)
                                {
                                    for (int pos = 0; pos < freq; pos++)
                                    {
                                        scratch.Add(docsAndPositionsEnum.NextPosition());
                                        if (hasOffsets_Renamed)
                                        {
                                            scratch.Add(docsAndPositionsEnum.StartOffset());
                                            scratch.Add(docsAndPositionsEnum.EndOffset());
                                        }
                                        if (hasPayloads_Renamed)
                                        {
                                            BytesRef payload = docsAndPositionsEnum.Payload;
                                            if (payload != null)
                                            {
                                                scratch.Add(payload.Length);
                                                ros.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                                            }
                                            else
                                            {
                                                scratch.Add(0);
                                            }
                                        }
                                    }
                                }
                            }
                        }


                        byte[] payloads;
                        if (hasPayloads_Renamed)
                        {
                            ros.Flush();
                            payloads = new byte[(int) ros.Length];
                            ros.WriteTo(payloads, 0);
                        }
                        else
                        {
                            payloads = null;
                        }

                        int[] postings = scratch.Get();

                        ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
                    }
                    else
                    {
                        var docs = new int[docFreq];
                        int[] freqs;
                        int[][] positions;
                        byte[][][] payloads;

                        if (hasFreq)
                        {
                            freqs = new int[docFreq];
                            if (hasPos)
                            {
                                positions = new int[docFreq][];
                                if (hasPayloads_Renamed)
                                {
                                    payloads = new byte[docFreq][][];
                                }
                                else
                                {
                                    payloads = null;
                                }
                            }
                            else
                            {
                                positions = null;
                                payloads = null;
                            }
                        }
                        else
                        {
                            freqs = null;
                            positions = null;
                            payloads = null;
                        }

                        // Use separate int[] for the postings for high-freq
                        // terms:
                        int upto = 0;
                        while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                        {
                            docs[upto] = docID;
                            if (hasFreq)
                            {
                                int freq = docsEnum2.Freq();
                                freqs[upto] = freq;
                                if (hasPos)
                                {
                                    int mult;
                                    if (hasOffsets_Renamed)
                                    {
                                        mult = 3;
                                    }
                                    else
                                    {
                                        mult = 1;
                                    }
                                    if (hasPayloads_Renamed)
                                    {
                                        payloads[upto] = new byte[freq][];
                                    }
                                    positions[upto] = new int[mult*freq];
                                    int posUpto = 0;
                                    for (int pos = 0; pos < freq; pos++)
                                    {
                                        positions[upto][posUpto] = docsAndPositionsEnum.NextPosition();
                                        if (hasPayloads_Renamed)
                                        {
                                            BytesRef payload = docsAndPositionsEnum.Payload;
                                            if (payload != null)
                                            {
                                                var payloadBytes = new byte[payload.Length];
                                                Array.Copy(payload.Bytes, payload.Offset, payloadBytes, 0,
                                                    payload.Length);
                                                payloads[upto][pos] = payloadBytes;
                                            }
                                        }
                                        posUpto++;
                                        if (hasOffsets_Renamed)
                                        {
                                            positions[upto][posUpto++] = docsAndPositionsEnum.StartOffset();
                                            positions[upto][posUpto++] = docsAndPositionsEnum.EndOffset();
                                        }
                                    }
                                }
                            }

                            upto++;
                        }
                        Debug.Assert(upto == docFreq);
                        ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq);
                    }

                    terms[count] = ent;
                    SetSkips(count, termBytes);
                    count++;
                }

                // End sentinel:
                termOffsets[count] = termOffset;

                FinishSkips();

                //System.out.println(skipCount + " skips: " + field);

                this.termBytes = new byte[termOffset];
                Array.Copy(termBytes, 0, this.termBytes, 0, termOffset);

                // Pack skips:
                this.skips = new int[skipCount];
                this.skipOffsets = new int[1 + numTerms];

                int skipOffset = 0;
                for (int i = 0; i < numTerms; i++)
                {
                    int[] termSkips = terms[i].skips;
                    skipOffsets[i] = skipOffset;
                    if (termSkips != null)
                    {
                        Array.Copy(termSkips, 0, skips, skipOffset, termSkips.Length);
                        skipOffset += termSkips.Length;
                        terms[i].skips = null;
                    }
                }
                this.skipOffsets[numTerms] = skipOffset;
                Debug.Assert(skipOffset == skipCount);
            }
Пример #10
0
 // Writes the contents of buffer into the fields stream
 // and adds a new entry for this document into the index
 // stream.  This assumes the buffer was already written
 // in the correct fields format.
 internal void  FlushDocument(int numStoredFields, RAMOutputStream buffer)
 {
     indexStream.WriteLong(fieldsStream.FilePointer);
     fieldsStream.WriteVInt(numStoredFields);
     buffer.WriteTo(fieldsStream);
 }
Пример #11
0
 private void  InitBlock(TermVectorsTermsWriter enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
     buffer    = enclosingInstance.docWriter.NewPerDocBuffer();
     perDocTvf = new RAMOutputStream(buffer);
 }
Пример #12
0
 private void  InitBlock(StoredFieldsWriter enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
     buffer = enclosingInstance.docWriter.NewPerDocBuffer();
     fdt    = new RAMOutputStream(buffer);
 }