internal override void NewTerm(int termID)
        {
            // First time we're seeing this term since the last
            // flush
            Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));

            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;

            postings.lastDocIDs[termID] = docState.docID;
            if (!hasFreq)
            {
                postings.lastDocCodes[termID] = docState.docID;
            }
            else
            {
                postings.lastDocCodes[termID] = docState.docID << 1;
                postings.termFreqs[termID]    = 1;
                if (hasProx)
                {
                    WriteProx(termID, fieldState.Position);
                    if (hasOffsets)
                    {
                        WriteOffsets(termID, fieldState.Offset);
                    }
                }
                else
                {
                    Debug.Assert(!hasOffsets);
                }
            }
            fieldState.MaxTermFrequency = Math.Max(1, fieldState.MaxTermFrequency);
            fieldState.UniqueTermCount++;
        }
Esempio n. 2
0
            internal override void CopyTo(ParallelPostingsArray toArray, int numToCopy)
            {
                Debug.Assert(toArray is FreqProxPostingsArray);
                FreqProxPostingsArray to = (FreqProxPostingsArray)toArray;

                base.CopyTo(toArray, numToCopy);

                Array.Copy(LastDocIDs, 0, to.LastDocIDs, 0, numToCopy);
                Array.Copy(LastDocCodes, 0, to.LastDocCodes, 0, numToCopy);
                if (LastPositions != null)
                {
                    Debug.Assert(to.LastPositions != null);
                    Array.Copy(LastPositions, 0, to.LastPositions, 0, numToCopy);
                }
                if (LastOffsets != null)
                {
                    Debug.Assert(to.LastOffsets != null);
                    Array.Copy(LastOffsets, 0, to.LastOffsets, 0, numToCopy);
                }
                if (TermFreqs != null)
                {
                    Debug.Assert(to.TermFreqs != null);
                    Array.Copy(TermFreqs, 0, to.TermFreqs, 0, numToCopy);
                }
            }
Esempio n. 3
0
        internal override void NewTerm(int termID)
        {
            // First time we're seeing this term since the last
            // flush
            Debug.Assert(DocState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));

            FreqProxPostingsArray postings = (FreqProxPostingsArray)TermsHashPerField.PostingsArray;

            postings.LastDocIDs[termID] = DocState.DocID;
            if (!HasFreq)
            {
                postings.LastDocCodes[termID] = DocState.DocID;
            }
            else
            {
                postings.LastDocCodes[termID] = DocState.DocID << 1;
                postings.TermFreqs[termID]    = 1;
                if (HasProx)
                {
                    WriteProx(termID, FieldState.Position_Renamed);
                    if (HasOffsets)
                    {
                        WriteOffsets(termID, FieldState.Offset_Renamed);
                    }
                }
                else
                {
                    Debug.Assert(!HasOffsets);
                }
            }
            FieldState.MaxTermFrequency_Renamed = Math.Max(1, FieldState.MaxTermFrequency_Renamed);
            FieldState.UniqueTermCount_Renamed++;
        }
Esempio n. 4
0
        internal void WriteProx(int termID, int proxCode)
        {
            //System.out.println("writeProx termID=" + termID + " proxCode=" + proxCode);
            Debug.Assert(HasProx);
            BytesRef payload;

            if (PayloadAttribute == null)
            {
                payload = null;
            }
            else
            {
                payload = PayloadAttribute.Payload;
            }

            if (payload != null && payload.Length > 0)
            {
                TermsHashPerField.WriteVInt(1, (proxCode << 1) | 1);
                TermsHashPerField.WriteVInt(1, payload.Length);
                TermsHashPerField.WriteBytes(1, payload.Bytes, payload.Offset, payload.Length);
                HasPayloads = true;
            }
            else
            {
                TermsHashPerField.WriteVInt(1, proxCode << 1);
            }

            FreqProxPostingsArray postings = (FreqProxPostingsArray)TermsHashPerField.PostingsArray;

            postings.LastPositions[termID] = FieldState.Position_Renamed;
        }
Esempio n. 5
0
        internal void WriteProx(int termID, int proxCode)
        {
            //System.out.println("writeProx termID=" + termID + " proxCode=" + proxCode);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(hasProx);
            }
            BytesRef payload;

            if (payloadAttribute == null)
            {
                payload = null;
            }
            else
            {
                payload = payloadAttribute.Payload;
            }

            if (payload != null && payload.Length > 0)
            {
                termsHashPerField.WriteVInt32(1, (proxCode << 1) | 1);
                termsHashPerField.WriteVInt32(1, payload.Length);
                termsHashPerField.WriteBytes(1, payload.Bytes, payload.Offset, payload.Length);
                hasPayloads = true;
            }
            else
            {
                termsHashPerField.WriteVInt32(1, proxCode << 1);
            }

            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;

            postings.lastPositions[termID] = fieldState.Position;
        }
Esempio n. 6
0
        internal void WriteOffsets(int termID, int offsetAccum)
        {
            Debug.Assert(HasOffsets);
            int startOffset = offsetAccum + OffsetAttribute.StartOffset();
            int endOffset   = offsetAccum + OffsetAttribute.EndOffset();
            FreqProxPostingsArray postings = (FreqProxPostingsArray)TermsHashPerField.PostingsArray;

            Debug.Assert(startOffset - postings.LastOffsets[termID] >= 0);
            TermsHashPerField.WriteVInt(1, startOffset - postings.LastOffsets[termID]);
            TermsHashPerField.WriteVInt(1, endOffset - startOffset);

            postings.LastOffsets[termID] = startOffset;
        }
Esempio n. 7
0
        internal void WriteOffsets(int termID, int offsetAccum)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(hasOffsets);
            }
            int startOffset = offsetAccum + offsetAttribute.StartOffset;
            int endOffset   = offsetAccum + offsetAttribute.EndOffset;
            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(startOffset - postings.lastOffsets[termID] >= 0);
            }
            termsHashPerField.WriteVInt32(1, startOffset - postings.lastOffsets[termID]);
            termsHashPerField.WriteVInt32(1, endOffset - startOffset);

            postings.lastOffsets[termID] = startOffset;
        }
        internal override void NewTerm(int termID)
        {
            // First time we're seeing this term since the last
            // flush
            // LUCENENET: .NET doesn't support asserts in release mode
            if (Lucene.Net.Diagnostics.Debugging.AssertsEnabled)
            {
                docState.TestPoint("FreqProxTermsWriterPerField.newTerm start");
            }

            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;

            postings.lastDocIDs[termID] = docState.docID;
            if (!hasFreq)
            {
                postings.lastDocCodes[termID] = docState.docID;
            }
            else
            {
                postings.lastDocCodes[termID] = docState.docID << 1;
                postings.termFreqs[termID]    = 1;
                if (hasProx)
                {
                    WriteProx(termID, fieldState.Position);
                    if (hasOffsets)
                    {
                        WriteOffsets(termID, fieldState.Offset);
                    }
                }
                else
                {
                    Debug.Assert(!hasOffsets);
                }
            }
            fieldState.MaxTermFrequency = Math.Max(1, fieldState.MaxTermFrequency);
            fieldState.UniqueTermCount++;
        }
Esempio n. 9
0
            internal override void CopyTo(ParallelPostingsArray toArray, int numToCopy)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(toArray is FreqProxPostingsArray);
                }
                FreqProxPostingsArray to = (FreqProxPostingsArray)toArray;

                base.CopyTo(toArray, numToCopy);

                Array.Copy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
                Array.Copy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
                if (lastPositions != null)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(to.lastPositions != null);
                    }
                    Array.Copy(lastPositions, 0, to.lastPositions, 0, numToCopy);
                }
                if (lastOffsets != null)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(to.lastOffsets != null);
                    }
                    Array.Copy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
                }
                if (termFreqs != null)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(to.termFreqs != null);
                    }
                    Array.Copy(termFreqs, 0, to.termFreqs, 0, numToCopy);
                }
            }
Esempio n. 10
0
        /* Walk through all unique text tokens (Posting
         * instances) found in this field and serialize them
         * into a single RAM segment. */

        internal void Flush(string fieldName, FieldsConsumer consumer, SegmentWriteState state)
        {
            if (!fieldInfo.Indexed)
            {
                return; // nothing to flush, don't bother the codec with the unindexed field
            }

            TermsConsumer        termsConsumer = consumer.AddField(fieldInfo);
            IComparer <BytesRef> termComp      = termsConsumer.Comparator;

            // CONFUSING: this.indexOptions holds the index options
            // that were current when we first saw this field.  But
            // it's possible this has changed, eg when other
            // documents are indexed that cause a "downgrade" of the
            // IndexOptions.  So we must decode the in-RAM buffer
            // according to this.indexOptions, but then write the
            // new segment to the directory according to
            // currentFieldIndexOptions:
            FieldInfo.IndexOptions?currentFieldIndexOptions = fieldInfo.FieldIndexOptions;
            Debug.Assert(currentFieldIndexOptions != null);

            bool writeTermFreq  = currentFieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
            bool writePositions = currentFieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
            bool writeOffsets   = currentFieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;

            bool readTermFreq  = this.HasFreq;
            bool readPositions = this.HasProx;
            bool readOffsets   = this.HasOffsets;

            //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);

            // Make sure FieldInfo.update is working correctly!:
            Debug.Assert(!writeTermFreq || readTermFreq);
            Debug.Assert(!writePositions || readPositions);
            Debug.Assert(!writeOffsets || readOffsets);

            Debug.Assert(!writeOffsets || writePositions);

            IDictionary <Term, int?> segDeletes;

            if (state.SegUpdates != null && state.SegUpdates.Terms.Count > 0)
            {
                segDeletes = state.SegUpdates.Terms;
            }
            else
            {
                segDeletes = null;
            }

            int[]    termIDs  = TermsHashPerField.SortPostings(termComp);
            int      numTerms = TermsHashPerField.BytesHash.Size();
            BytesRef text     = new BytesRef();
            FreqProxPostingsArray postings = (FreqProxPostingsArray)TermsHashPerField.PostingsArray;
            ByteSliceReader       freq     = new ByteSliceReader();
            ByteSliceReader       prox     = new ByteSliceReader();

            FixedBitSet visitedDocs      = new FixedBitSet(state.SegmentInfo.DocCount);
            long        sumTotalTermFreq = 0;
            long        sumDocFreq       = 0;

            Term protoTerm = new Term(fieldName);

            for (int i = 0; i < numTerms; i++)
            {
                int termID = termIDs[i];
                // Get BytesRef
                int textStart = postings.TextStarts[termID];
                TermsHashPerField.BytePool.SetBytesRef(text, textStart);

                TermsHashPerField.InitReader(freq, termID, 0);
                if (readPositions || readOffsets)
                {
                    TermsHashPerField.InitReader(prox, termID, 1);
                }

                // TODO: really TermsHashPerField should take over most
                // of this loop, including merge sort of terms from
                // multiple threads and interacting with the
                // TermsConsumer, only calling out to us (passing us the
                // DocsConsumer) to handle delivery of docs/positions

                PostingsConsumer postingsConsumer = termsConsumer.StartTerm(text);

                int?delDocLimit;
                if (segDeletes != null)
                {
                    protoTerm.Bytes_Renamed = text;
                    int?docIDUpto;
                    segDeletes.TryGetValue(protoTerm, out docIDUpto);
                    if (docIDUpto != null)
                    {
                        delDocLimit = docIDUpto;
                    }
                    else
                    {
                        delDocLimit = 0;
                    }
                }
                else
                {
                    delDocLimit = 0;
                }

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                int  docFreq       = 0;
                long totalTermFreq = 0;
                int  docID         = 0;

                while (true)
                {
                    //System.out.println("  cycle");
                    int termFreq;
                    if (freq.Eof())
                    {
                        if (postings.LastDocCodes[termID] != -1)
                        {
                            // Return last doc
                            docID = postings.LastDocIDs[termID];
                            if (readTermFreq)
                            {
                                termFreq = postings.TermFreqs[termID];
                            }
                            else
                            {
                                termFreq = -1;
                            }
                            postings.LastDocCodes[termID] = -1;
                        }
                        else
                        {
                            // EOF
                            break;
                        }
                    }
                    else
                    {
                        int code = freq.ReadVInt();
                        if (!readTermFreq)
                        {
                            docID   += code;
                            termFreq = -1;
                        }
                        else
                        {
                            docID += (int)((uint)code >> 1);
                            if ((code & 1) != 0)
                            {
                                termFreq = 1;
                            }
                            else
                            {
                                termFreq = freq.ReadVInt();
                            }
                        }

                        Debug.Assert(docID != postings.LastDocIDs[termID]);
                    }

                    docFreq++;
                    Debug.Assert(docID < state.SegmentInfo.DocCount, "doc=" + docID + " maxDoc=" + state.SegmentInfo.DocCount);

                    // NOTE: we could check here if the docID was
                    // deleted, and skip it.  However, this is somewhat
                    // dangerous because it can yield non-deterministic
                    // behavior since we may see the docID before we see
                    // the term that caused it to be deleted.  this
                    // would mean some (but not all) of its postings may
                    // make it into the index, which'd alter the docFreq
                    // for those terms.  We could fix this by doing two
                    // passes, ie first sweep marks all del docs, and
                    // 2nd sweep does the real flush, but I suspect
                    // that'd add too much time to flush.
                    visitedDocs.Set(docID);
                    postingsConsumer.StartDoc(docID, writeTermFreq ? termFreq : -1);
                    if (docID < delDocLimit)
                    {
                        // Mark it deleted.  TODO: we could also skip
                        // writing its postings; this would be
                        // deterministic (just for this Term's docs).

                        // TODO: can we do this reach-around in a cleaner way????
                        if (state.LiveDocs == null)
                        {
                            state.LiveDocs = DocState.DocWriter.Codec.LiveDocsFormat().NewLiveDocs(state.SegmentInfo.DocCount);
                        }
                        if (state.LiveDocs.Get(docID))
                        {
                            state.DelCountOnFlush++;
                            state.LiveDocs.Clear(docID);
                        }
                    }

                    totalTermFreq += termFreq;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.

                    if (readPositions || readOffsets)
                    {
                        // we did record positions (& maybe payload) and/or offsets
                        int position = 0;
                        int offset   = 0;
                        for (int j = 0; j < termFreq; j++)
                        {
                            BytesRef thisPayload;

                            if (readPositions)
                            {
                                int code = prox.ReadVInt();
                                position += (int)((uint)code >> 1);

                                if ((code & 1) != 0)
                                {
                                    // this position has a payload
                                    int payloadLength = prox.ReadVInt();

                                    if (Payload == null)
                                    {
                                        Payload       = new BytesRef();
                                        Payload.Bytes = new sbyte[payloadLength];
                                    }
                                    else if (Payload.Bytes.Length < payloadLength)
                                    {
                                        Payload.Grow(payloadLength);
                                    }

                                    prox.ReadBytes(Payload.Bytes, 0, payloadLength);
                                    Payload.Length = payloadLength;
                                    thisPayload    = Payload;
                                }
                                else
                                {
                                    thisPayload = null;
                                }

                                if (readOffsets)
                                {
                                    int startOffset = offset + prox.ReadVInt();
                                    int endOffset   = startOffset + prox.ReadVInt();
                                    if (writePositions)
                                    {
                                        if (writeOffsets)
                                        {
                                            Debug.Assert(startOffset >= 0 && endOffset >= startOffset, "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset);
                                            postingsConsumer.AddPosition(position, thisPayload, startOffset, endOffset);
                                        }
                                        else
                                        {
                                            postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                        }
                                    }
                                    offset = startOffset;
                                }
                                else if (writePositions)
                                {
                                    postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                }
                            }
                        }
                    }
                    postingsConsumer.FinishDoc();
                }
                termsConsumer.FinishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
                sumTotalTermFreq += totalTermFreq;
                sumDocFreq       += docFreq;
            }

            termsConsumer.Finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.Cardinality());
        }
Esempio n. 11
0
        internal override void AddTerm(int termID)
        {
            Debug.Assert(DocState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));

            FreqProxPostingsArray postings = (FreqProxPostingsArray)TermsHashPerField.PostingsArray;

            Debug.Assert(!HasFreq || postings.TermFreqs[termID] > 0);

            if (!HasFreq)
            {
                Debug.Assert(postings.TermFreqs == null);
                if (DocState.DocID != postings.LastDocIDs[termID])
                {
                    Debug.Assert(DocState.DocID > postings.LastDocIDs[termID]);
                    TermsHashPerField.WriteVInt(0, postings.LastDocCodes[termID]);
                    postings.LastDocCodes[termID] = DocState.DocID - postings.LastDocIDs[termID];
                    postings.LastDocIDs[termID]   = DocState.DocID;
                    FieldState.UniqueTermCount_Renamed++;
                }
            }
            else if (DocState.DocID != postings.LastDocIDs[termID])
            {
                Debug.Assert(DocState.DocID > postings.LastDocIDs[termID], "id: " + DocState.DocID + " postings ID: " + postings.LastDocIDs[termID] + " termID: " + termID);
                // Term not yet seen in the current doc but previously
                // seen in other doc(s) since the last flush

                // Now that we know doc freq for previous doc,
                // write it & lastDocCode
                if (1 == postings.TermFreqs[termID])
                {
                    TermsHashPerField.WriteVInt(0, postings.LastDocCodes[termID] | 1);
                }
                else
                {
                    TermsHashPerField.WriteVInt(0, postings.LastDocCodes[termID]);
                    TermsHashPerField.WriteVInt(0, postings.TermFreqs[termID]);
                }
                postings.TermFreqs[termID]          = 1;
                FieldState.MaxTermFrequency_Renamed = Math.Max(1, FieldState.MaxTermFrequency_Renamed);
                postings.LastDocCodes[termID]       = (DocState.DocID - postings.LastDocIDs[termID]) << 1;
                postings.LastDocIDs[termID]         = DocState.DocID;
                if (HasProx)
                {
                    WriteProx(termID, FieldState.Position_Renamed);
                    if (HasOffsets)
                    {
                        postings.LastOffsets[termID] = 0;
                        WriteOffsets(termID, FieldState.Offset_Renamed);
                    }
                }
                else
                {
                    Debug.Assert(!HasOffsets);
                }
                FieldState.UniqueTermCount_Renamed++;
            }
            else
            {
                FieldState.MaxTermFrequency_Renamed = Math.Max(FieldState.MaxTermFrequency_Renamed, ++postings.TermFreqs[termID]);
                if (HasProx)
                {
                    WriteProx(termID, FieldState.Position_Renamed - postings.LastPositions[termID]);
                }
                if (HasOffsets)
                {
                    WriteOffsets(termID, FieldState.Offset_Renamed);
                }
            }
        }
        internal override void AddTerm(int termID)
        {
            Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));

            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;

            Debug.Assert(!hasFreq || postings.termFreqs[termID] > 0);

            if (!hasFreq)
            {
                Debug.Assert(postings.termFreqs == null);
                if (docState.docID != postings.lastDocIDs[termID])
                {
                    Debug.Assert(docState.docID > postings.lastDocIDs[termID]);
                    termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID]);
                    postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
                    postings.lastDocIDs[termID]   = docState.docID;
                    fieldState.UniqueTermCount++;
                }
            }
            else if (docState.docID != postings.lastDocIDs[termID])
            {
                Debug.Assert(docState.docID > postings.lastDocIDs[termID], "id: " + docState.docID + " postings ID: " + postings.lastDocIDs[termID] + " termID: " + termID);
                // Term not yet seen in the current doc but previously
                // seen in other doc(s) since the last flush

                // Now that we know doc freq for previous doc,
                // write it & lastDocCode
                if (1 == postings.termFreqs[termID])
                {
                    termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID] | 1);
                }
                else
                {
                    termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID]);
                    termsHashPerField.WriteVInt32(0, postings.termFreqs[termID]);
                }
                postings.termFreqs[termID]    = 1;
                fieldState.MaxTermFrequency   = Math.Max(1, fieldState.MaxTermFrequency);
                postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
                postings.lastDocIDs[termID]   = docState.docID;
                if (hasProx)
                {
                    WriteProx(termID, fieldState.Position);
                    if (hasOffsets)
                    {
                        postings.lastOffsets[termID] = 0;
                        WriteOffsets(termID, fieldState.Offset);
                    }
                }
                else
                {
                    Debug.Assert(!hasOffsets);
                }
                fieldState.UniqueTermCount++;
            }
            else
            {
                fieldState.MaxTermFrequency = Math.Max(fieldState.MaxTermFrequency, ++postings.termFreqs[termID]);
                if (hasProx)
                {
                    WriteProx(termID, fieldState.Position - postings.lastPositions[termID]);
                }
                if (hasOffsets)
                {
                    WriteOffsets(termID, fieldState.Offset);
                }
            }
        }