// DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
        {
            lock (this)
            {
                Fields fields = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return;
                }

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                {
                    Term term  = update.term;
                    int  limit = update.docIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!string.Equals(term.Field, currentField, StringComparison.Ordinal))
                    {
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.CompareToOrdinal(term.Field) < 0;
                        currentField = term.Field;
                        Terms terms = fields.GetTerms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                            continue; // no terms in that field
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.field, update.type);
                        if (dvUpdates == null)
                        {
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.field, update.type, reader.MaxDoc);
                        }
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                            {
                                break; // no more docs that can be updated for this term
                            }
                            dvUpdates.Add(doc, update.value);
                        }
                    }
                }
            }
        }
Beispiel #2
0
 /// <summary>
 /// Returns the number of documents containing the
 /// <paramref name="term"/>.  This method returns 0 if the term or
 /// field does not exist.  This method does not take into
 /// account deleted documents that have not yet been merged
 /// away. </summary>
 /// <seealso cref="TermsEnum.DocFreq"/>
 public abstract int DocFreq(Term term);
Beispiel #3
0
 /// <summary>
 /// Returns the total number of occurrences of <paramref name="term"/> across all
 /// documents (the sum of the Freq for each doc that has this term). This
 /// will be -1 if the codec doesn't support this measure. Note that, like other
 /// term measures, this measure does not take deleted documents into account.
 /// </summary>
 public abstract long TotalTermFreq(Term term);
Beispiel #4
0
        internal void Flush(string fieldName, FieldsConsumer consumer, SegmentWriteState state)
        {
            if (!fieldInfo.IsIndexed)
            {
                return; // nothing to flush, don't bother the codec with the unindexed field
            }

            TermsConsumer        termsConsumer = consumer.AddField(fieldInfo);
            IComparer <BytesRef> termComp      = termsConsumer.Comparer;

            // CONFUSING: this.indexOptions holds the index options
            // that were current when we first saw this field.  But
            // it's possible this has changed, eg when other
            // documents are indexed that cause a "downgrade" of the
            // IndexOptions.  So we must decode the in-RAM buffer
            // according to this.indexOptions, but then write the
            // new segment to the directory according to
            // currentFieldIndexOptions:
            IndexOptions currentFieldIndexOptions = fieldInfo.IndexOptions;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(currentFieldIndexOptions != IndexOptions.NONE);
            }

            // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
            bool writeTermFreq  = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS) >= 0;
            bool writePositions = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
            bool writeOffsets   = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

            bool readTermFreq  = this.hasFreq;
            bool readPositions = this.hasProx;
            bool readOffsets   = this.hasOffsets;

            //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);

            // Make sure FieldInfo.update is working correctly!:
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(!writeTermFreq || readTermFreq);
                Debugging.Assert(!writePositions || readPositions);
                Debugging.Assert(!writeOffsets || readOffsets);

                Debugging.Assert(!writeOffsets || writePositions);
            }

            IDictionary <Term, int?> segDeletes;

            if (state.SegUpdates != null && state.SegUpdates.terms.Count > 0)
            {
                segDeletes = state.SegUpdates.terms;
            }
            else
            {
                segDeletes = null;
            }

            int[]    termIDs  = termsHashPerField.SortPostings(termComp);
            int      numTerms = termsHashPerField.bytesHash.Count;
            BytesRef text     = new BytesRef();
            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;
            ByteSliceReader       freq     = new ByteSliceReader();
            ByteSliceReader       prox     = new ByteSliceReader();

            FixedBitSet visitedDocs      = new FixedBitSet(state.SegmentInfo.DocCount);
            long        sumTotalTermFreq = 0;
            long        sumDocFreq       = 0;

            Term protoTerm = new Term(fieldName);

            for (int i = 0; i < numTerms; i++)
            {
                int termID = termIDs[i];
                // Get BytesRef
                int textStart = postings.textStarts[termID];
                termsHashPerField.bytePool.SetBytesRef(text, textStart);

                termsHashPerField.InitReader(freq, termID, 0);
                if (readPositions || readOffsets)
                {
                    termsHashPerField.InitReader(prox, termID, 1);
                }

                // TODO: really TermsHashPerField should take over most
                // of this loop, including merge sort of terms from
                // multiple threads and interacting with the
                // TermsConsumer, only calling out to us (passing us the
                // DocsConsumer) to handle delivery of docs/positions

                PostingsConsumer postingsConsumer = termsConsumer.StartTerm(text);

                int?delDocLimit;
                if (segDeletes != null)
                {
                    protoTerm.Bytes = text;
                    if (segDeletes.TryGetValue(protoTerm, out int?docIDUpto) && docIDUpto != null)
                    {
                        delDocLimit = docIDUpto;
                    }
                    else
                    {
                        delDocLimit = 0;
                    }
                }
                else
                {
                    delDocLimit = 0;
                }

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                int  docFreq       = 0;
                long totalTermFreq = 0;
                int  docID         = 0;

                while (true)
                {
                    //System.out.println("  cycle");
                    int termFreq;
                    if (freq.Eof())
                    {
                        if (postings.lastDocCodes[termID] != -1)
                        {
                            // Return last doc
                            docID = postings.lastDocIDs[termID];
                            if (readTermFreq)
                            {
                                termFreq = postings.termFreqs[termID];
                            }
                            else
                            {
                                termFreq = -1;
                            }
                            postings.lastDocCodes[termID] = -1;
                        }
                        else
                        {
                            // EOF
                            break;
                        }
                    }
                    else
                    {
                        int code = freq.ReadVInt32();
                        if (!readTermFreq)
                        {
                            docID   += code;
                            termFreq = -1;
                        }
                        else
                        {
                            docID += (int)((uint)code >> 1);
                            if ((code & 1) != 0)
                            {
                                termFreq = 1;
                            }
                            else
                            {
                                termFreq = freq.ReadVInt32();
                            }
                        }

                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docID != postings.lastDocIDs[termID]);
                        }
                    }

                    docFreq++;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(docID < state.SegmentInfo.DocCount, "doc={0} maxDoc={1}", docID, state.SegmentInfo.DocCount);
                    }

                    // NOTE: we could check here if the docID was
                    // deleted, and skip it.  However, this is somewhat
                    // dangerous because it can yield non-deterministic
                    // behavior since we may see the docID before we see
                    // the term that caused it to be deleted.  this
                    // would mean some (but not all) of its postings may
                    // make it into the index, which'd alter the docFreq
                    // for those terms.  We could fix this by doing two
                    // passes, ie first sweep marks all del docs, and
                    // 2nd sweep does the real flush, but I suspect
                    // that'd add too much time to flush.
                    visitedDocs.Set(docID);
                    postingsConsumer.StartDoc(docID, writeTermFreq ? termFreq : -1);
                    if (docID < delDocLimit)
                    {
                        // Mark it deleted.  TODO: we could also skip
                        // writing its postings; this would be
                        // deterministic (just for this Term's docs).

                        // TODO: can we do this reach-around in a cleaner way????
                        if (state.LiveDocs == null)
                        {
                            state.LiveDocs = docState.docWriter.codec.LiveDocsFormat.NewLiveDocs(state.SegmentInfo.DocCount);
                        }
                        if (state.LiveDocs.Get(docID))
                        {
                            state.DelCountOnFlush++;
                            state.LiveDocs.Clear(docID);
                        }
                    }

                    totalTermFreq += termFreq;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.

                    if (readPositions || readOffsets)
                    {
                        // we did record positions (& maybe payload) and/or offsets
                        int position = 0;
                        int offset   = 0;
                        for (int j = 0; j < termFreq; j++)
                        {
                            BytesRef thisPayload;

                            if (readPositions)
                            {
                                int code = prox.ReadVInt32();
                                position += (int)((uint)code >> 1);

                                if ((code & 1) != 0)
                                {
                                    // this position has a payload
                                    int payloadLength = prox.ReadVInt32();

                                    if (payload == null)
                                    {
                                        payload       = new BytesRef();
                                        payload.Bytes = new byte[payloadLength];
                                    }
                                    else if (payload.Bytes.Length < payloadLength)
                                    {
                                        payload.Grow(payloadLength);
                                    }

                                    prox.ReadBytes(payload.Bytes, 0, payloadLength);
                                    payload.Length = payloadLength;
                                    thisPayload    = payload;
                                }
                                else
                                {
                                    thisPayload = null;
                                }

                                if (readOffsets)
                                {
                                    int startOffset = offset + prox.ReadVInt32();
                                    int endOffset   = startOffset + prox.ReadVInt32();
                                    if (writePositions)
                                    {
                                        if (writeOffsets)
                                        {
                                            if (Debugging.AssertsEnabled)
                                            {
                                                Debugging.Assert(startOffset >= 0 && endOffset >= startOffset, "startOffset={0},endOffset={1},offset={2}", startOffset, endOffset, offset);
                                            }
                                            postingsConsumer.AddPosition(position, thisPayload, startOffset, endOffset);
                                        }
                                        else
                                        {
                                            postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                        }
                                    }
                                    offset = startOffset;
                                }
                                else if (writePositions)
                                {
                                    postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                }
                            }
                        }
                    }
                    postingsConsumer.FinishDoc();
                }
                termsConsumer.FinishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
                sumTotalTermFreq += totalTermFreq;
                sumDocFreq       += docFreq;
            }

            termsConsumer.Finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.Cardinality());
        }