C# (CSharp) Debugging.Assert 예제들

프로그래밍 언어: C# (CSharp)

클래스/타입: Debugging

메소드/함수: Assert

hotexamples.com에서의 예제들: 30

C# (CSharp) Debugging.Assert - 30개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 Debugging.Assert에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Log(30)

Assert(30)

LogError(30)

Dump(26)

Message(21)

LogSystem(16)

CurrentPrice(10)

LogWarning(10)

PrintDbQryResults(8)

Print(8)

PrintScreen(8)

LogException(7)

DisplayDebugMessage(6)

PrintDbTable(5)

LogSystemWarning(5)

DebugLog(4)

ReadDefaultDebugActionFromConfig(4)

ReadTraceDisableFromConfig(4)

LogInstrument(3)

ReportEdge(3)

BreakOnPointOfInterest(3)

Info(2)

PrintDebugBuild(2)

FP(2)

PrintGraph(2)

PrintWfcGraphModules(2)

PrintExceptionMessage(2)

LogWaring(1)

PushDebugMessage(1)

AppendText(1)

LogTrade(1)

AreaOfInterest(1)

Invoke(1)

Init(1)

GetType(1)

GetHashCode(1)

Fail(1)

Equals(1)

DumpTypeHeirachy(1)

CreateDebugControl(1)

ComponentCheck(1)

ChildCheck(1)

CheckRequiredInputButtonMapping(1)

CheckRequiredInputAxisMapping(1)

Launch(1)

예제 #1

파일 보기

        public override void VisitDocument(int docID, StoredFieldVisitor visitor)
        {
            fieldsStream.Seek(indexReader.GetStartPointer(docID));

            int docBase   = fieldsStream.ReadVInt32();
            int chunkDocs = fieldsStream.ReadVInt32();

            if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs)
            {
                throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")");
            }

            int numStoredFields, offset, length, totalLength;

            if (chunkDocs == 1)
            {
                numStoredFields = fieldsStream.ReadVInt32();
                offset          = 0;
                length          = fieldsStream.ReadVInt32();
                totalLength     = length;
            }
            else
            {
                int bitsPerStoredFields = fieldsStream.ReadVInt32();
                if (bitsPerStoredFields == 0)
                {
                    numStoredFields = fieldsStream.ReadVInt32();
                }
                else if (bitsPerStoredFields > 31)
                {
                    throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
                }
                else
                {
                    long filePointer           = fieldsStream.GetFilePointer();
                    PackedInt32s.Reader reader = PackedInt32s.GetDirectReaderNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields);
                    numStoredFields = (int)(reader.Get(docID - docBase));
                    fieldsStream.Seek(filePointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields));
                }

                int bitsPerLength = fieldsStream.ReadVInt32();
                if (bitsPerLength == 0)
                {
                    length      = fieldsStream.ReadVInt32();
                    offset      = (docID - docBase) * length;
                    totalLength = chunkDocs * length;
                }
                else if (bitsPerStoredFields > 31)
                {
                    throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")");
                }
                else
                {
                    PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1);
                    int off = 0;
                    for (int i = 0; i < docID - docBase; ++i)
                    {
                        off += (int)it.Next();
                    }
                    offset = off;
                    length = (int)it.Next();
                    off   += length;
                    for (int i = docID - docBase + 1; i < chunkDocs; ++i)
                    {
                        off += (int)it.Next();
                    }
                    totalLength = off;
                }
            }

            if ((length == 0) != (numStoredFields == 0))
            {
                throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")");
            }
            if (numStoredFields == 0)
            {
                // nothing to do
                return;
            }

            DataInput documentInput;

            if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(chunkSize > 0);
                    Debugging.Assert(offset < chunkSize);
                }

                decompressor.Decompress(fieldsStream, chunkSize, offset, Math.Min(length, chunkSize - offset), bytes);
                documentInput = new DataInputAnonymousInnerClassHelper(this, length);
            }
            else
            {
                BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
                decompressor.Decompress(fieldsStream, totalLength, offset, length, bytes);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(bytes.Length == length);
                }
                documentInput = new ByteArrayDataInput(bytes.Bytes, bytes.Offset, bytes.Length);
            }

            for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++)
            {
                long      infoAndBits = documentInput.ReadVInt64();
                int       fieldNumber = (int)((long)((ulong)infoAndBits >> CompressingStoredFieldsWriter.TYPE_BITS));
                FieldInfo fieldInfo   = fieldInfos.FieldInfo(fieldNumber);

                int bits = (int)(infoAndBits & CompressingStoredFieldsWriter.TYPE_MASK);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(bits <= CompressingStoredFieldsWriter.NUMERIC_DOUBLE, "bits={0:x}", bits);
                }

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(documentInput, visitor, fieldInfo, bits);
                    break;

                case StoredFieldVisitor.Status.NO:
                    SkipField(documentInput, bits);
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }

예제 #2

파일 보기

            public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
            {
                int      valueCount = 0;
                BytesRef lastValue  = null;

                foreach (BytesRef b in values)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(b != null);
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(b.IsValid());
                    }
                    if (valueCount > 0)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(b.CompareTo(lastValue) > 0);
                        }
                    }
                    lastValue = BytesRef.DeepCopyOf(b);
                    valueCount++;
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(valueCount <= maxDoc);
                }

                FixedBitSet seenOrds = new FixedBitSet(valueCount);

                int count = 0;

                foreach (long?v in docToOrd)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(v != null);
                    }
                    int ord = (int)v.Value;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ord >= -1 && ord < valueCount);
                    }
                    if (ord >= 0)
                    {
                        seenOrds.Set(ord);
                    }
                    count++;
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(count == maxDoc);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(seenOrds.Cardinality() == valueCount);
                }
                CheckIterator(values.GetEnumerator(), valueCount, false);
                CheckIterator(docToOrd.GetEnumerator(), maxDoc, false);
                @in.AddSortedField(field, values, docToOrd);
            }

예제 #3

파일 보기

        public virtual ApplyDeletesResult ApplyDeletesAndUpdates(IndexWriter.ReaderPool readerPool, IList <SegmentCommitInfo> infos)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

                if (infos.Count == 0)
                {
                    return(new ApplyDeletesResult(false, nextGen++, null));
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CheckDeleteStats());
                }

                if (!Any())
                {
                    if (infoStream.IsEnabled("BD"))
                    {
                        infoStream.Message("BD", "applyDeletes: no deletes; skipping");
                    }
                    return(new ApplyDeletesResult(false, nextGen++, null));
                }

                if (infoStream.IsEnabled("BD"))
                {
                    infoStream.Message("BD", "applyDeletes: infos=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", infos) + " packetCount=" + updates.Count);
                }

                long gen = nextGen++;

                JCG.List <SegmentCommitInfo> infos2 = new JCG.List <SegmentCommitInfo>();
                infos2.AddRange(infos);
                infos2.Sort(sortSegInfoByDelGen);

                CoalescedUpdates coalescedUpdates = null;
                bool             anyNewDeletes    = false;

                int infosIDX = infos2.Count - 1;
                int delIDX   = updates.Count - 1;

                IList <SegmentCommitInfo> allDeleted = null;

                while (infosIDX >= 0)
                {
                    //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);

                    FrozenBufferedUpdates packet = delIDX >= 0 ? updates[delIDX] : null;
                    SegmentCommitInfo     info   = infos2[infosIDX];
                    long segGen = info.BufferedDeletesGen;

                    if (packet != null && segGen < packet.DelGen)
                    {
                        //        System.out.println("  coalesce");
                        if (coalescedUpdates == null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }
                        if (!packet.isSegmentPrivate)
                        {
                            /*
                             * Only coalesce if we are NOT on a segment private del packet: the segment private del packet
                             * must only applied to segments with the same delGen.  Yet, if a segment is already deleted
                             * from the SI since it had no more documents remaining after some del packets younger than
                             * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been
                             * removed.
                             */
                            coalescedUpdates.Update(packet);
                        }

                        delIDX--;
                    }
                    else if (packet != null && segGen == packet.DelGen)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(packet.isSegmentPrivate, "Packet and Segments deletegen can only match on a segment private del packet gen={0}", segGen);
                        }
                        //System.out.println("  eq");

                        // Lock order: IW -> BD -> RP
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(readerPool.InfoIsLive(info));
                        }
                        ReadersAndUpdates rld    = readerPool.Get(info, true);
                        SegmentReader     reader = rld.GetReader(IOContext.READ);
                        int  delCount            = 0;
                        bool segAllDeletes;
                        try
                        {
                            DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                            if (coalescedUpdates != null)
                            {
                                //System.out.println("    del coalesced");
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates);
                            }
                            //System.out.println("    del exact");
                            // Don't delete by Term here; DocumentsWriterPerThread
                            // already did that on flush:
                            delCount += (int)ApplyQueryDeletes(packet.GetQueriesEnumerable(), rld, reader);
                            ApplyDocValuesUpdates(packet.numericDVUpdates, rld, reader, dvUpdates);
                            ApplyDocValuesUpdates(packet.binaryDVUpdates, rld, reader, dvUpdates);
                            if (dvUpdates.Any())
                            {
                                rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                            }
                            int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(fullDelCount <= rld.Info.Info.DocCount);
                            }
                            segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                        }
                        finally
                        {
                            rld.Release(reader);
                            readerPool.Release(rld);
                        }
                        anyNewDeletes |= delCount > 0;

                        if (segAllDeletes)
                        {
                            if (allDeleted == null)
                            {
                                allDeleted = new JCG.List <SegmentCommitInfo>();
                            }
                            allDeleted.Add(info);
                        }

                        if (infoStream.IsEnabled("BD"))
                        {
                            infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedUpdates == null ? "null" : coalescedUpdates.ToString()) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                        }

                        if (coalescedUpdates == null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }

                        /*
                         * Since we are on a segment private del packet we must not
                         * update the coalescedDeletes here! We can simply advance to the
                         * next packet and seginfo.
                         */
                        delIDX--;
                        infosIDX--;
                        info.SetBufferedDeletesGen(gen);
                    }
                    else
                    {
                        //System.out.println("  gt");

                        if (coalescedUpdates != null)
                        {
                            // Lock order: IW -> BD -> RP
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(readerPool.InfoIsLive(info));
                            }
                            ReadersAndUpdates rld    = readerPool.Get(info, true);
                            SegmentReader     reader = rld.GetReader(IOContext.READ);
                            int  delCount            = 0;
                            bool segAllDeletes;
                            try
                            {
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                                ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates);
                                if (dvUpdates.Any())
                                {
                                    rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                                }
                                int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fullDelCount <= rld.Info.Info.DocCount);
                                }
                                segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                            }
                            finally
                            {
                                rld.Release(reader);
                                readerPool.Release(rld);
                            }
                            anyNewDeletes |= delCount > 0;

                            if (segAllDeletes)
                            {
                                if (allDeleted == null)
                                {
                                    allDeleted = new JCG.List <SegmentCommitInfo>();
                                }
                                allDeleted.Add(info);
                            }

                            if (infoStream.IsEnabled("BD"))
                            {
                                infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedUpdates + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                            }
                        }
                        info.SetBufferedDeletesGen(gen);

                        infosIDX--;
                    }
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CheckDeleteStats());
                }
                if (infoStream.IsEnabled("BD"))
                {
                    infoStream.Message("BD", "applyDeletes took " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                }
                // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;

                return(new ApplyDeletesResult(anyNewDeletes, gen, allDeleted));
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }

예제 #4

파일 보기

        // algorithm: treat sentence snippets as miniature documents
        // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
        // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
        private Passage[] HighlightDoc(string field, BytesRef[] terms, int contentLength, BreakIterator bi, int doc,
                                       TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n)
        {
            PassageScorer scorer = GetScorer(field);

            if (scorer == null)
            {
                throw new NullReferenceException("PassageScorer cannot be null");
            }
            JCG.PriorityQueue <OffsetsEnum> pq = new JCG.PriorityQueue <OffsetsEnum>();
            float[] weights = new float[terms.Length];
            // initialize postings
            for (int i = 0; i < terms.Length; i++)
            {
                DocsAndPositionsEnum de = postings[i];
                int pDoc;
                if (de == EMPTY)
                {
                    continue;
                }
                else if (de == null)
                {
                    postings[i] = EMPTY; // initially
                    if (!termsEnum.SeekExact(terms[i]))
                    {
                        continue; // term not found
                    }
                    de = postings[i] = termsEnum.DocsAndPositions(null, null, DocsAndPositionsFlags.OFFSETS);
                    if (de == null)
                    {
                        // no positions available
                        throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                    }
                    pDoc = de.Advance(doc);
                }
                else
                {
                    pDoc = de.DocID;
                    if (pDoc < doc)
                    {
                        pDoc = de.Advance(doc);
                    }
                }

                if (doc == pDoc)
                {
                    weights[i] = scorer.Weight(contentLength, de.Freq);
                    de.NextPosition();
                    pq.Add(new OffsetsEnum(de, i));
                }
            }

            pq.Add(new OffsetsEnum(EMPTY, int.MaxValue)); // a sentinel for termination

            JCG.PriorityQueue <Passage> passageQueue = new JCG.PriorityQueue <Passage>(n, Comparer <Passage> .Create((left, right) =>
            {
                if (left.score < right.score)
                {
                    return(-1);
                }
                else if (left.score > right.score)
                {
                    return(1);
                }
                else
                {
                    return(left.startOffset - right.startOffset);
                }
            }));
            Passage current = new Passage();

            while (pq.TryDequeue(out OffsetsEnum off))
            {
                DocsAndPositionsEnum dp = off.dp;
                int start = dp.StartOffset;
                if (start == -1)
                {
                    throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                }
                int end = dp.EndOffset;
                // LUCENE-5166: this hit would span the content limit... however more valid
                // hits may exist (they are sorted by start). so we pretend like we never
                // saw this term, it won't cause a passage to be added to passageQueue or anything.
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(EMPTY.StartOffset == int.MaxValue);
                }
                if (start < contentLength && end > contentLength)
                {
                    continue;
                }
                if (start >= current.endOffset)
                {
                    if (current.startOffset >= 0)
                    {
                        // finalize current
                        current.score *= scorer.Norm(current.startOffset);
                        // new sentence: first add 'current' to queue
                        if (passageQueue.Count == n && current.score < passageQueue.Peek().score)
                        {
                            current.Reset(); // can't compete, just reset it
                        }
                        else
                        {
                            passageQueue.Enqueue(current);
                            if (passageQueue.Count > n)
                            {
                                current = passageQueue.Dequeue();
                                current.Reset();
                            }
                            else
                            {
                                current = new Passage();
                            }
                        }
                    }
                    // if we exceed limit, we are done
                    if (start >= contentLength)
                    {
                        Passage[] passages = passageQueue.ToArray();
                        foreach (Passage p in passages)
                        {
                            p.Sort();
                        }
                        // sort in ascending order
                        ArrayUtil.TimSort(passages, Comparer <Passage> .Create((left, right) => left.startOffset - right.startOffset));
                        return(passages);
                    }
                    // advance breakiterator
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(BreakIterator.Done < 0);
                    }
                    current.startOffset = Math.Max(bi.Preceding(start + 1), 0);
                    current.endOffset   = Math.Min(bi.Next(), contentLength);
                }
                int tf = 0;
                while (true)
                {
                    tf++;
                    BytesRef term = terms[off.id];
                    if (term == null)
                    {
                        // multitermquery match, pull from payload
                        term = off.dp.GetPayload();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(term != null);
                        }
                    }
                    current.AddMatch(start, end, term);
                    if (off.pos == dp.Freq)
                    {
                        break; // removed from pq
                    }
                    else
                    {
                        off.pos++;
                        dp.NextPosition();
                        start = dp.StartOffset;
                        end   = dp.EndOffset;
                    }
                    if (start >= current.endOffset || end > contentLength)
                    {
                        pq.Enqueue(off);
                        break;
                    }
                }
                current.score += weights[off.id] * scorer.Tf(tf, current.endOffset - current.startOffset);
            }

            // Dead code but compiler disagrees:
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(false);
            }
            return(null);
        }

예제 #5

파일 보기

            public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(outerInstance.termArrays.Count > 0);
                }
                AtomicReader reader   = (context.AtomicReader);
                IBits        liveDocs = acceptDocs;

                PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[outerInstance.termArrays.Count];

                Terms fieldTerms = reader.GetTerms(outerInstance.field);

                if (fieldTerms is null)
                {
                    return(null);
                }

                // Reuse single TermsEnum below:
                TermsEnum termsEnum = fieldTerms.GetEnumerator();

                for (int pos = 0; pos < postingsFreqs.Length; pos++)
                {
                    Term[] terms = outerInstance.termArrays[pos];

                    DocsAndPositionsEnum postingsEnum;
                    int docFreq;

                    if (terms.Length > 1)
                    {
                        postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);

                        // coarse -- this overcounts since a given doc can
                        // have more than one term:
                        docFreq = 0;
                        for (int termIdx = 0; termIdx < terms.Length; termIdx++)
                        {
                            Term      term      = terms[termIdx];
                            TermState termState = termContexts[term].Get(context.Ord);
                            if (termState is null)
                            {
                                // Term not in reader
                                continue;
                            }
                            termsEnum.SeekExact(term.Bytes, termState);
                            docFreq += termsEnum.DocFreq;
                        }

                        if (docFreq == 0)
                        {
                            // None of the terms are in this reader
                            return(null);
                        }
                    }
                    else
                    {
                        Term      term      = terms[0];
                        TermState termState = termContexts[term].Get(context.Ord);
                        if (termState is null)
                        {
                            // Term not in reader
                            return(null);
                        }
                        termsEnum.SeekExact(term.Bytes, termState);
                        postingsEnum = termsEnum.DocsAndPositions(liveDocs, null, DocsAndPositionsFlags.NONE);

                        if (postingsEnum is null)
                        {
                            // term does exist, but has no positions
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(termsEnum.Docs(liveDocs, null, DocsFlags.NONE) != null, "termstate found but no term exists in reader");
                            }
                            throw IllegalStateException.Create("field \"" + term.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.Text + ")");
                        }

                        docFreq = termsEnum.DocFreq;
                    }

                    postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, (int)outerInstance.positions[pos], terms);
                }

                // sort by increasing docFreq order
                if (outerInstance.slop == 0)
                {
                    ArrayUtil.TimSort(postingsFreqs);
                }

                if (outerInstance.slop == 0)
                {
                    ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.GetSimScorer(stats, context));
                    if (s.noDocs)
                    {
                        return(null);
                    }
                    else
                    {
                        return(s);
                    }
                }
                else
                {
                    return(new SloppyPhraseScorer(this, postingsFreqs, outerInstance.slop, similarity.GetSimScorer(stats, context)));
                }
            }

예제 #6

파일 보기

파일: PerFieldDocValuesFormat.cs 프로젝트: zalintyre/lucenenet

            internal virtual DocValuesConsumer GetInstance(FieldInfo field)
            {
                DocValuesFormat format = null;

                if (field.DocValuesGen != -1)
                {
                    string formatName = field.GetAttribute(PER_FIELD_FORMAT_KEY);
                    // this means the field never existed in that segment, yet is applied updates
                    if (formatName != null)
                    {
                        format = DocValuesFormat.ForName(formatName);
                    }
                }
                if (format == null)
                {
                    format = outerInstance.GetDocValuesFormatForField(field.Name);
                }
                if (format == null)
                {
                    throw new InvalidOperationException("invalid null DocValuesFormat for field=\"" + field.Name + "\"");
                }
                string formatName_ = format.Name;

                string previousValue = field.PutAttribute(PER_FIELD_FORMAT_KEY, formatName_);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(field.DocValuesGen != -1 || previousValue == null, () => "formatName=" + formatName_ + " prevValue=" + previousValue);
                }

                int?suffix = null;

                ConsumerAndSuffix consumer;

                if (!formats.TryGetValue(format, out consumer) || consumer == null)
                {
                    // First time we are seeing this format; create a new instance

                    if (field.DocValuesGen != -1)
                    {
                        string suffixAtt = field.GetAttribute(PER_FIELD_SUFFIX_KEY);
                        // even when dvGen is != -1, it can still be a new field, that never
                        // existed in the segment, and therefore doesn't have the recorded
                        // attributes yet.
                        if (suffixAtt != null)
                        {
                            suffix = Convert.ToInt32(suffixAtt, CultureInfo.InvariantCulture);
                        }
                    }

                    if (suffix == null)
                    {
                        // bump the suffix
                        if (!suffixes.TryGetValue(formatName_, out suffix) || suffix == null)
                        {
                            suffix = 0;
                        }
                        else
                        {
                            suffix = suffix + 1;
                        }
                    }
                    suffixes[formatName_] = suffix;

                    string segmentSuffix = GetFullSegmentSuffix(segmentWriteState.SegmentSuffix, GetSuffix(formatName_, Convert.ToString(suffix, CultureInfo.InvariantCulture)));
                    consumer          = new ConsumerAndSuffix();
                    consumer.Consumer = format.FieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix));
                    consumer.Suffix   = suffix.Value; // LUCENENET NOTE: At this point suffix cannot be null
                    formats[format]   = consumer;
                }
                else
                {
                    // we've already seen this format, so just grab its suffix
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(suffixes.ContainsKey(formatName_));
                    }
                    suffix = consumer.Suffix;
                }

                previousValue = field.PutAttribute(PER_FIELD_SUFFIX_KEY, Convert.ToString(suffix, CultureInfo.InvariantCulture));
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(field.DocValuesGen != -1 || previousValue == null, () => "suffix=" + Convert.ToString(suffix, CultureInfo.InvariantCulture) + " prevValue=" + previousValue);
                }

                // TODO: we should only provide the "slice" of FIS
                // that this DVF actually sees ...
                return(consumer.Consumer);
            }

예제 #7

파일 보기

파일: Lucene40FieldInfosWriter.cs 프로젝트: zalintyre/lucenenet

        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_CURRENT);
                output.WriteVInt32(infos.Count);
                foreach (FieldInfo fi in infos)
                {
                    IndexOptions indexOptions = fi.IndexOptions;
                    sbyte        bits         = 0x0;
                    if (fi.HasVectors)
                    {
                        bits |= Lucene40FieldInfosFormat.STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms)
                    {
                        bits |= Lucene40FieldInfosFormat.OMIT_NORMS;
                    }
                    if (fi.HasPayloads)
                    {
                        bits |= Lucene40FieldInfosFormat.STORE_PAYLOADS;
                    }
                    if (fi.IsIndexed)
                    {
                        bits |= Lucene40FieldInfosFormat.IS_INDEXED;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads);
                        }
                        if (indexOptions == IndexOptions.DOCS_ONLY)
                        {
                            bits |= Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            bits |= Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= Lucene40FieldInfosFormat.OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);
                    output.WriteVInt32(fi.Number);
                    output.WriteByte((byte)bits);

                    // pack the DV types in one byte
                    byte dv  = DocValuesByte(fi.DocValuesType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY));
                    byte nrm = DocValuesByte(fi.NormType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY));
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
                    }
                    var val = (byte)(0xff & ((nrm << 4) | (byte)dv));
                    output.WriteByte(val);
                    output.WriteStringStringMap(fi.Attributes);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(output);
                }
            }
        }

예제 #8

파일 보기

파일: Lucene3xFields.cs 프로젝트: harunoknel/YAFNET

            // Look for seek type 3 ("pop"): if the delta from
            // prev -> current was replacing an S with an E,
            // we must now seek to beyond that E.  this seek
            // "finishes" the dance at this character
            // position.
            private bool DoPop()
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  try pop");
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(newSuffixStart <= prevTerm.Length);
                    Debugging.Assert(newSuffixStart < scratchTerm.Length || newSuffixStart == 0);
                }

                if (prevTerm.Length > newSuffixStart && IsNonBMPChar(prevTerm.Bytes, newSuffixStart) && IsHighBMPChar(scratchTerm.Bytes, newSuffixStart))
                {
                    // Seek type 2 -- put 0xFF at this position:
                    scratchTerm.Bytes[newSuffixStart] = 0xff;
                    scratchTerm.Length = newSuffixStart + 1;

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("    seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString());
                    }

                    // TODO: more efficient seek?  can we simply swap
                    // the enums?
                    outerInstance.TermsDict.SeekEnum(termEnum, new Term(fieldInfo.Name, scratchTerm), true);

                    Term t2 = termEnum.Term();

                    // We could hit EOF or different field since this
                    // was a seek "forward":
                    if (t2 != null && t2.Field == internedFieldName)
                    {
                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + t2.Bytes);
                        }

                        BytesRef b2 = t2.Bytes;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(b2.Offset == 0);
                        }

                        // Set newSuffixStart -- we can't use
                        // termEnum's since the above seek may have
                        // done no scanning (eg, term was precisely
                        // and index term, or, was in the term seek
                        // cache):
                        scratchTerm.CopyBytes(b2);
                        SetNewSuffixStart(prevTerm, scratchTerm);

                        return(true);
                    }
                    else if (newSuffixStart != 0 || scratchTerm.Length != 0)
                    {
                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("      got term=null (or next field)");
                        }
                        newSuffixStart     = 0;
                        scratchTerm.Length = 0;
                        return(true);
                    }
                }

                return(false);
            }

예제 #9

파일 보기

파일: Lucene3xFields.cs 프로젝트: harunoknel/YAFNET

            // Pre-flex indices store terms in UTF16 sort order, but
            // certain queries require Unicode codepoint order; this
            // method carefully seeks around surrogates to handle
            // this impedance mismatch

            private void SurrogateDance()
            {
                if (!unicodeSortOrder)
                {
                    return;
                }

                // We are invoked after TIS.next() (by UTF16 order) to
                // possibly seek to a different "next" (by unicode
                // order) term.

                // We scan only the "delta" from the last term to the
                // current term, in UTF8 bytes.  We look at 1) the bytes
                // stripped from the prior term, and then 2) the bytes
                // appended to that prior term's prefix.

                // We don't care about specific UTF8 sequences, just
                // the "category" of the UTF16 character.  Category S
                // is a high/low surrogate pair (it non-BMP).
                // Category E is any BMP char > UNI_SUR_LOW_END (and <
                // U+FFFF). Category A is the rest (any unicode char
                // <= UNI_SUR_HIGH_START).

                // The core issue is that pre-flex indices sort the
                // characters as ASE, while flex must sort as AES.  So
                // when scanning, when we hit S, we must 1) seek
                // forward to E and enum the terms there, then 2) seek
                // back to S and enum all terms there, then 3) seek to
                // after E.  Three different seek points (1, 2, 3).

                // We can easily detect S in UTF8: if a byte has
                // prefix 11110 (0xf0), then that byte and the
                // following 3 bytes encode a single unicode codepoint
                // in S.  Similarly, we can detect E: if a byte has
                // prefix 1110111 (0xee), then that byte and the
                // following 2 bytes encode a single unicode codepoint
                // in E.

                // Note that this is really a recursive process --
                // maybe the char at pos 2 needs to dance, but any
                // point in its dance, suddenly pos 4 needs to dance
                // so you must finish pos 4 before returning to pos
                // 2.  But then during pos 4's dance maybe pos 7 needs
                // to dance, etc.  However, despite being recursive,
                // we don't need to hold any state because the state
                // can always be derived by looking at prior term &
                // current term.

                // TODO: can we avoid this copy?
                if (termEnum.Term() == null || termEnum.Term().Field != internedFieldName)
                {
                    scratchTerm.Length = 0;
                }
                else
                {
                    scratchTerm.CopyBytes(termEnum.Term().Bytes);
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  dance");
                    Console.WriteLine("    prev=" + UnicodeUtil.ToHexString(prevTerm.Utf8ToString()));
                    Console.WriteLine("         " + prevTerm.ToString());
                    Console.WriteLine("    term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()));
                    Console.WriteLine("         " + scratchTerm.ToString());
                }

                // this code assumes TermInfosReader/SegmentTermEnum
                // always use BytesRef.offset == 0
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(prevTerm.Offset == 0);
                    Debugging.Assert(scratchTerm.Offset == 0);
                }

                // Need to loop here because we may need to do multiple
                // pops, and possibly a continue in the end, ie:
                //
                //  cont
                //  pop, cont
                //  pop, pop, cont
                //  <nothing>
                //

                while (true)
                {
                    if (DoContinue())
                    {
                        break;
                    }
                    else
                    {
                        if (!DoPop())
                        {
                            break;
                        }
                    }
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  finish bmp ends");
                }

                DoPushes();
            }

예제 #10

파일 보기

파일: Lucene3xFields.cs 프로젝트: harunoknel/YAFNET

            public override BytesRef Next()
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.next()");
                }
                if (skipNext)
                {
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  skipNext=true");
                    }
                    skipNext = false;
                    if (termEnum.Term() == null)
                    {
                        return(null);
                        // PreFlex codec interns field names:
                    }
                    else if (termEnum.Term().Field != internedFieldName)
                    {
                        return(null);
                    }
                    else
                    {
                        return(current = termEnum.Term().Bytes);
                    }
                }

                // TODO: can we use STE's prevBuffer here?
                prevTerm.CopyBytes(termEnum.Term().Bytes);

                if (termEnum.Next() && termEnum.Term().Field == internedFieldName)
                {
                    newSuffixStart = termEnum.newSuffixStart;
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  newSuffixStart=" + newSuffixStart);
                    }
                    SurrogateDance();
                    Term t = termEnum.Term();
                    if (t == null || t.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        }
                        current = null;
                    }
                    else
                    {
                        current = t.Bytes;
                    }
                    return(current);
                }
                else
                {
                    // this field is exhausted, but we have to give
                    // surrogateDance a chance to seek back:
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  force cont");
                    }
                    //newSuffixStart = prevTerm.length;
                    newSuffixStart = 0;
                    SurrogateDance();

                    Term t = termEnum.Term();
                    if (t == null || t.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        }
                        return(null);
                    }
                    else
                    {
                        current = t.Bytes;
                        return(current);
                    }
                }
            }

예제 #11

파일 보기

파일: Lucene3xFields.cs 프로젝트: harunoknel/YAFNET

            // Swap in S, in place of E:
            private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos)
            {
                int savLength = term.Length;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(term.Offset == 0);
                }

                // The 3 bytes starting at downTo make up 1
                // unicode character:
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(IsHighBMPChar(term.Bytes, pos));
                }

                // NOTE: we cannot make this assert, because
                // AutomatonQuery legitimately sends us malformed UTF8
                // (eg the UTF8 bytes with just 0xee)
                // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();

                // Save the bytes && length, since we need to
                // restore this if seek "back" finds no matching
                // terms
                if (term.Bytes.Length < 4 + pos)
                {
                    term.Grow(4 + pos);
                }

                scratch[0] = (sbyte)term.Bytes[pos];
                scratch[1] = (sbyte)term.Bytes[pos + 1];
                scratch[2] = (sbyte)term.Bytes[pos + 2];

                term.Bytes[pos]     = 0xf0;
                term.Bytes[pos + 1] = 0x90;
                term.Bytes[pos + 2] = 0x80;
                term.Bytes[pos + 3] = 0x80;
                term.Length         = 4 + pos;

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }

                // Seek "back":
                outerInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true);

                // Test if the term we seek'd to in fact found a
                // surrogate pair at the same position as the E:
                Term t2 = te.Term();

                // Cannot be null (or move to next field) because at
                // "worst" it'd seek to the same term we are on now,
                // unless we are being called from seek
                if (t2 == null || t2.Field != internedFieldName)
                {
                    return(false);
                }

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("      got term=" + UnicodeUtil.ToHexString(t2.Text()));
                }

                // Now test if prefix is identical and we found
                // a non-BMP char at the same position:
                BytesRef b2 = t2.Bytes;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(b2.Offset == 0);
                }

                bool matches;

                if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos))
                {
                    matches = true;
                    for (int i = 0; i < pos; i++)
                    {
                        if (term.Bytes[i] != b2.Bytes[i])
                        {
                            matches = false;
                            break;
                        }
                    }
                }
                else
                {
                    matches = false;
                }

                // Restore term:
                term.Length         = savLength;
                term.Bytes[pos]     = (byte)scratch[0];
                term.Bytes[pos + 1] = (byte)scratch[1];
                term.Bytes[pos + 2] = (byte)scratch[2];

                return(matches);
            }

예제 #12

파일 보기

파일: PhraseQuery.cs 프로젝트: YAFNET/YAFNET

            public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(outerInstance.terms.Count > 0);
                }
                AtomicReader reader   = context.AtomicReader;
                IBits        liveDocs = acceptDocs;

                PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[outerInstance.terms.Count];

                Terms fieldTerms = reader.GetTerms(outerInstance.field);

                if (fieldTerms is null)
                {
                    return(null);
                }

                // Reuse single TermsEnum below:
                TermsEnum te = fieldTerms.GetEnumerator();

                for (int i = 0; i < outerInstance.terms.Count; i++)
                {
                    Term      t     = outerInstance.terms[i];
                    TermState state = states[i].Get(context.Ord);
                    if (state is null) // term doesnt exist in this segment
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(TermNotInReader(reader, t), "no termstate found but term exists in reader");
                        }
                        return(null);
                    }
                    te.SeekExact(t.Bytes, state);
                    DocsAndPositionsEnum postingsEnum = te.DocsAndPositions(liveDocs, null, DocsAndPositionsFlags.NONE);

                    // PhraseQuery on a field that did not index
                    // positions.
                    if (postingsEnum is null)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(te.SeekExact(t.Bytes), "termstate found but no term exists in reader");
                        }
                        // term does exist, but has no positions
                        throw IllegalStateException.Create("field \"" + t.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.Text + ")");
                    }
                    postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.DocFreq, outerInstance.positions[i], t);
                }

                // sort by increasing docFreq order
                if (outerInstance.slop == 0)
                {
                    ArrayUtil.TimSort(postingsFreqs);
                }

                if (outerInstance.slop == 0) // optimize exact case
                {
                    ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.GetSimScorer(stats, context));
                    if (s.noDocs)
                    {
                        return(null);
                    }
                    else
                    {
                        return(s);
                    }
                }
                else
                {
                    return(new SloppyPhraseScorer(this, postingsFreqs, outerInstance.slop, similarity.GetSimScorer(stats, context)));
                }
            }

예제 #13

파일 보기

        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool   success = false;

            fieldInfos = fn;
            numDocs    = si.DocCount;
            ChecksumIndexInput indexStream = null;

            try
            {
                string indexStreamFN  = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
                // Load the index into memory
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer());
                }
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                long maxPointer = -1;

                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    maxPointer = indexStream.ReadVInt64();
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(indexStream);
#pragma warning restore 612, 618
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                fieldsStream = d.OpenInput(fieldsStreamFN, context);
                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length)
                    {
                        throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length);
                    }
                }
                else
                {
                    maxPointer = fieldsStream.Length;
                }
                this.maxPointer = maxPointer;
                string codecNameDat  = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT;
                int    fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                if (version != fieldsVersion)
                {
                    throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer());
                }

                if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS)
                {
                    chunkSize = fieldsStream.ReadVInt32();
                }
                else
                {
                    chunkSize = -1;
                }
                packedIntsVersion = fieldsStream.ReadVInt32();
                decompressor      = compressionMode.NewDecompressor();
                this.bytes        = new BytesRef();

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this, indexStream);
                }
            }
        }

예제 #14

파일 보기

            /// <summary>
            /// Go to the chunk containing the provided <paramref name="doc"/> ID.
            /// </summary>
            internal void Next(int doc)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(doc >= this.docBase + this.chunkDocs, "{0} {1} {2}", doc, this.docBase, this.chunkDocs);
                }
                fieldsStream.Seek(outerInstance.indexReader.GetStartPointer(doc));

                int docBase   = fieldsStream.ReadVInt32();
                int chunkDocs = fieldsStream.ReadVInt32();

                if (docBase < this.docBase + this.chunkDocs || docBase + chunkDocs > outerInstance.numDocs)
                {
                    throw new CorruptIndexException($"Corrupted: current docBase={this.docBase}, current numDocs={this.chunkDocs}, new docBase={docBase}, new numDocs={chunkDocs} (resource={fieldsStream})");
                }
                this.docBase   = docBase;
                this.chunkDocs = chunkDocs;

                if (chunkDocs > numStoredFields.Length)
                {
                    int newLength = ArrayUtil.Oversize(chunkDocs, 4);
                    numStoredFields = new int[newLength];
                    lengths         = new int[newLength];
                }

                if (chunkDocs == 1)
                {
                    numStoredFields[0] = fieldsStream.ReadVInt32();
                    lengths[0]         = fieldsStream.ReadVInt32();
                }
                else
                {
                    int bitsPerStoredFields = fieldsStream.ReadVInt32();
                    if (bitsPerStoredFields == 0)
                    {
                        Arrays.Fill(numStoredFields, 0, chunkDocs, fieldsStream.ReadVInt32());
                    }
                    else if (bitsPerStoredFields > 31)
                    {
                        throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
                    }
                    else
                    {
                        PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, outerInstance.packedIntsVersion, chunkDocs, bitsPerStoredFields, 1);
                        for (int i = 0; i < chunkDocs; ++i)
                        {
                            numStoredFields[i] = (int)it.Next();
                        }
                    }

                    int bitsPerLength = fieldsStream.ReadVInt32();
                    if (bitsPerLength == 0)
                    {
                        Arrays.Fill(lengths, 0, chunkDocs, fieldsStream.ReadVInt32());
                    }
                    else if (bitsPerLength > 31)
                    {
                        throw new CorruptIndexException($"bitsPerLength={bitsPerLength}");
                    }
                    else
                    {
                        PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, outerInstance.packedIntsVersion, chunkDocs, bitsPerLength, 1);
                        for (int i = 0; i < chunkDocs; ++i)
                        {
                            lengths[i] = (int)it.Next();
                        }
                    }
                }
            }

예제 #15

파일 보기

파일: TestUTF32ToUTF8.cs 프로젝트: zalintyre/lucenenet

        private void TestOne(Random r, ByteRunAutomaton a, int startCode, int endCode, int iters)
        {
            // Verify correct ints are accepted
            int  nonSurrogateCount;
            bool ovSurStart;

            if (endCode < UnicodeUtil.UNI_SUR_HIGH_START || startCode > UnicodeUtil.UNI_SUR_LOW_END)
            {
                // no overlap w/ surrogates
                nonSurrogateCount = endCode - startCode + 1;
                ovSurStart        = false;
            }
            else if (IsSurrogate(startCode))
            {
                // start of range overlaps surrogates
                nonSurrogateCount = endCode - startCode + 1 - (UnicodeUtil.UNI_SUR_LOW_END - startCode + 1);
                ovSurStart        = false;
            }
            else if (IsSurrogate(endCode))
            {
                // end of range overlaps surrogates
                ovSurStart        = true;
                nonSurrogateCount = endCode - startCode + 1 - (endCode - UnicodeUtil.UNI_SUR_HIGH_START + 1);
            }
            else
            {
                // range completely subsumes surrogates
                ovSurStart        = true;
                nonSurrogateCount = endCode - startCode + 1 - (UnicodeUtil.UNI_SUR_LOW_END - UnicodeUtil.UNI_SUR_HIGH_START + 1);
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(nonSurrogateCount > 0);
            }

            for (int iter = 0; iter < iters; iter++)
            {
                // pick random code point in-range

                int code = startCode + r.Next(nonSurrogateCount);
                if (IsSurrogate(code))
                {
                    if (ovSurStart)
                    {
                        code = UnicodeUtil.UNI_SUR_LOW_END + 1 + (code - UnicodeUtil.UNI_SUR_HIGH_START);
                    }
                    else
                    {
                        code = UnicodeUtil.UNI_SUR_LOW_END + 1 + (code - startCode);
                    }
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(code >= startCode && code <= endCode, () => "code=" + code + " start=" + startCode + " end=" + endCode);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(!IsSurrogate(code));
                }

                Assert.IsTrue(Matches(a, code), "DFA for range " + startCode + "-" + endCode + " failed to match code=" + code);
            }

            // Verify invalid ints are not accepted
            int invalidRange = MAX_UNICODE - (endCode - startCode + 1);

            if (invalidRange > 0)
            {
                for (int iter = 0; iter < iters; iter++)
                {
                    int x = TestUtil.NextInt32(r, 0, invalidRange - 1);
                    int code;
                    if (x >= startCode)
                    {
                        code = endCode + 1 + x - startCode;
                    }
                    else
                    {
                        code = x;
                    }
                    if ((code >= UnicodeUtil.UNI_SUR_HIGH_START && code <= UnicodeUtil.UNI_SUR_HIGH_END) | (code >= UnicodeUtil.UNI_SUR_LOW_START && code <= UnicodeUtil.UNI_SUR_LOW_END))
                    {
                        iter--;
                        continue;
                    }
                    Assert.IsFalse(Matches(a, code), "DFA for range " + startCode + "-" + endCode + " matched invalid code=" + code);
                }
            }
        }

예제 #16

파일 보기

파일: Lucene3xFields.cs 프로젝트: harunoknel/YAFNET

            // Look for seek type 1 ("push"): if the newly added
            // suffix contains any S, we must try to seek to the
            // corresponding E.  If we find a match, we go there;
            // else we keep looking for additional S's in the new
            // suffix.  this "starts" the dance, at this character
            // position:
            private void DoPushes()
            {
                int upTo = newSuffixStart;

                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("  try push newSuffixStart=" + newSuffixStart + " scratchLen=" + scratchTerm.Length);
                }

                while (upTo < scratchTerm.Length)
                {
                    if (IsNonBMPChar(scratchTerm.Bytes, upTo) && (upTo > newSuffixStart || (upTo >= prevTerm.Length || (!IsNonBMPChar(prevTerm.Bytes, upTo) && !IsHighBMPChar(prevTerm.Bytes, upTo)))))
                    {
                        // A non-BMP char (4 bytes UTF8) starts here:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(scratchTerm.Length >= upTo + 4);
                        }

                        int savLength = scratchTerm.Length;
                        scratch[0] = (sbyte)scratchTerm.Bytes[upTo];
                        scratch[1] = (sbyte)scratchTerm.Bytes[upTo + 1];
                        scratch[2] = (sbyte)scratchTerm.Bytes[upTo + 2];

                        scratchTerm.Bytes[upTo]     = (byte)UTF8_HIGH_BMP_LEAD;
                        scratchTerm.Bytes[upTo + 1] = 0x80;
                        scratchTerm.Bytes[upTo + 2] = 0x80;
                        scratchTerm.Length          = upTo + 3;

                        if (DEBUG_SURROGATES)
                        {
                            Console.WriteLine("    try seek 1 pos=" + upTo + " term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString() + " len=" + scratchTerm.Length);
                        }

                        // Seek "forward":
                        // TODO: more efficient seek?
                        outerInstance.TermsDict.SeekEnum(seekTermEnum, new Term(fieldInfo.Name, scratchTerm), true);

                        scratchTerm.Bytes[upTo]     = (byte)scratch[0];
                        scratchTerm.Bytes[upTo + 1] = (byte)scratch[1];
                        scratchTerm.Bytes[upTo + 2] = (byte)scratch[2];
                        scratchTerm.Length          = savLength;

                        // Did we find a match?
                        Term t2 = seekTermEnum.Term();

                        if (DEBUG_SURROGATES)
                        {
                            if (t2 == null)
                            {
                                Console.WriteLine("      hit term=null");
                            }
                            else
                            {
                                Console.WriteLine("      hit term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + (t2 == null ? null : t2.Bytes));
                            }
                        }

                        // Since this was a seek "forward", we could hit
                        // EOF or a different field:
                        bool matches;

                        if (t2 != null && t2.Field == internedFieldName)
                        {
                            BytesRef b2 = t2.Bytes;
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(b2.Offset == 0);
                            }
                            if (b2.Length >= upTo + 3 && IsHighBMPChar(b2.Bytes, upTo))
                            {
                                matches = true;
                                for (int i = 0; i < upTo; i++)
                                {
                                    if (scratchTerm.Bytes[i] != b2.Bytes[i])
                                    {
                                        matches = false;
                                        break;
                                    }
                                }
                            }
                            else
                            {
                                matches = false;
                            }
                        }
                        else
                        {
                            matches = false;
                        }

                        if (matches)
                        {
                            if (DEBUG_SURROGATES)
                            {
                                Console.WriteLine("      matches!");
                            }

                            // OK seek "back"
                            // TODO: more efficient seek?
                            outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), true);

                            scratchTerm.CopyBytes(seekTermEnum.Term().Bytes);

                            // +3 because we don't need to check the char
                            // at upTo: we know it's > BMP
                            upTo += 3;

                            // NOTE: we keep iterating, now, since this
                            // can easily "recurse".  Ie, after seeking
                            // forward at a certain char position, we may
                            // find another surrogate in our [new] suffix
                            // and must then do another seek (recurse)
                        }
                        else
                        {
                            upTo++;
                        }
                    }
                    else
                    {
                        upTo++;
                    }
                }
            }

예제 #17

파일 보기

파일: Lev2ParametricDescription.cs 프로젝트: wow64bb/YAFNET

        internal override int Transition(int absState, int position, int vector)
        {
            // null absState should never be passed in
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(absState != -1);
            }

            // decode absState -> state, offset
            int state  = absState / (m_w + 1);
            int offset = absState % (m_w + 1);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(offset >= 0);
            }

            if (position == m_w)
            {
                if (state < 3)
                {
                    int loc = vector * 3 + state;
                    offset += Unpack(offsetIncrs0, loc, 1);
                    state   = Unpack(toStates0, loc, 2) - 1;
                }
            }
            else if (position == m_w - 1)
            {
                if (state < 5)
                {
                    int loc = vector * 5 + state;
                    offset += Unpack(offsetIncrs1, loc, 1);
                    state   = Unpack(toStates1, loc, 3) - 1;
                }
            }
            else if (position == m_w - 2)
            {
                if (state < 11)
                {
                    int loc = vector * 11 + state;
                    offset += Unpack(offsetIncrs2, loc, 2);
                    state   = Unpack(toStates2, loc, 4) - 1;
                }
            }
            else if (position == m_w - 3)
            {
                if (state < 21)
                {
                    int loc = vector * 21 + state;
                    offset += Unpack(offsetIncrs3, loc, 2);
                    state   = Unpack(toStates3, loc, 5) - 1;
                }
            }
            else if (position == m_w - 4)
            {
                if (state < 30)
                {
                    int loc = vector * 30 + state;
                    offset += Unpack(offsetIncrs4, loc, 3);
                    state   = Unpack(toStates4, loc, 5) - 1;
                }
            }
            else
            {
                if (state < 30)
                {
                    int loc = vector * 30 + state;
                    offset += Unpack(offsetIncrs5, loc, 3);
                    state   = Unpack(toStates5, loc, 5) - 1;
                }
            }

            if (state == -1)
            {
                // null state
                return(-1);
            }
            else
            {
                // translate back to abs
                return(state * (m_w + 1) + offset);
            }
        }

예제 #18

파일 보기

파일: Lucene3xFields.cs 프로젝트: harunoknel/YAFNET

            public override SeekStatus SeekCeil(BytesRef term)
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }
                skipNext = false;
                TermInfosReader tis = outerInstance.TermsDict;
                Term            t0  = new Term(fieldInfo.Name, term);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termEnum != null);
                }

                tis.SeekEnum(termEnum, t0, false);

                Term t = termEnum.Term();

                if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes))
                {
                    // If we found an exact match, no need to do the
                    // surrogate dance
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek exact match");
                    }
                    current = t.Bytes;
                    return(SeekStatus.FOUND);
                }
                else if (t == null || t.Field != internedFieldName)
                {
                    // TODO: maybe we can handle this like the next()
                    // into null?  set term as prevTerm then dance?

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit EOF");
                    }

                    // We hit EOF; try end-case surrogate dance: if we
                    // find an E, try swapping in S, backwards:
                    scratchTerm.CopyBytes(term);

                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(scratchTerm.Offset == 0);
                    }

                    for (int i = scratchTerm.Length - 1; i >= 0; i--)
                    {
                        if (IsHighBMPChar(scratchTerm.Bytes, i))
                        {
                            if (DEBUG_SURROGATES)
                            {
                                Console.WriteLine("    found E pos=" + i + "; try seek");
                            }

                            if (SeekToNonBMP(seekTermEnum, scratchTerm, i))
                            {
                                scratchTerm.CopyBytes(seekTermEnum.Term().Bytes);
                                outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false);

                                newSuffixStart = 1 + i;

                                DoPushes();

                                // Found a match
                                // TODO: faster seek?
                                current = termEnum.Term().Bytes;
                                return(SeekStatus.NOT_FOUND);
                            }
                        }
                    }

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek END");
                    }

                    current = null;
                    return(SeekStatus.END);
                }
                else
                {
                    // We found a non-exact but non-null term; this one
                    // is fun -- just treat it like next, by pretending
                    // requested term was prev:
                    prevTerm.CopyBytes(term);

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text()));
                    }

                    BytesRef br = t.Bytes;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(br.Offset == 0);
                    }

                    SetNewSuffixStart(term, br);

                    SurrogateDance();

                    Term t2 = termEnum.Term();
                    if (t2 == null || t2.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        }
                        current = null;
                        return(SeekStatus.END);
                    }
                    else
                    {
                        current = t2.Bytes;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, () => "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString()));
                        }
                        return(SeekStatus.NOT_FOUND);
                    }
                }
            }

예제 #19

파일 보기

        private void Build(State start, State end, UTF8Sequence startUTF8, UTF8Sequence endUTF8, int upto)
        {
            // Break into start, middle, end:
            if (startUTF8.ByteAt(upto) == endUTF8.ByteAt(upto))
            {
                // Degen case: lead with the same byte:
                if (upto == startUTF8.len - 1 && upto == endUTF8.len - 1)
                {
                    // Super degen: just single edge, one UTF8 byte:
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end));
                    return;
                }
                else
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(startUTF8.len > upto + 1);
                        Debugging.Assert(endUTF8.len > upto + 1);
                    }
                    State n = NewUTF8State();

                    // Single value leading edge
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), n)); // type=single

                    // Recurse for the rest
                    Build(n, end, startUTF8, endUTF8, 1 + upto);
                }
            }
            else if (startUTF8.len == endUTF8.len)
            {
                if (upto == startUTF8.len - 1)
                {
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end)); // type=startend
                }
                else
                {
                    Start(start, end, startUTF8, upto, false);
                    if (endUTF8.ByteAt(upto) - startUTF8.ByteAt(upto) > 1)
                    {
                        // There is a middle
                        All(start, end, startUTF8.ByteAt(upto) + 1, endUTF8.ByteAt(upto) - 1, startUTF8.len - upto - 1);
                    }
                    End(start, end, endUTF8, upto, false);
                }
            }
            else
            {
                // start
                Start(start, end, startUTF8, upto, true);

                // possibly middle, spanning multiple num bytes
                int byteCount = 1 + startUTF8.len - upto;
                int limit     = endUTF8.len - upto;
                while (byteCount < limit)
                {
                    // wasteful: we only need first byte, and, we should
                    // statically encode this first byte:
                    tmpUTF8a.Set(startCodes[byteCount - 1]);
                    tmpUTF8b.Set(endCodes[byteCount - 1]);
                    All(start, end, tmpUTF8a.ByteAt(0), tmpUTF8b.ByteAt(0), tmpUTF8a.len - 1);
                    byteCount++;
                }

                // end
                End(start, end, endUTF8, upto, true);
            }
        }

예제 #20

파일 보기

        internal void MarkForFullFlush()
        {
            DocumentsWriterDeleteQueue flushingQueue;

            lock (this)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(!fullFlush, "called DWFC#markForFullFlush() while full flush is still running");
                    Debugging.Assert(fullFlushBuffer.Count == 0, () => "full flush buffer should be empty: " + fullFlushBuffer);
                }
                fullFlush     = true;
                flushingQueue = documentsWriter.deleteQueue;
                // Set a new delete queue - all subsequent DWPT will use this queue until
                // we do another full flush
                DocumentsWriterDeleteQueue newQueue = new DocumentsWriterDeleteQueue(flushingQueue.generation + 1);
                documentsWriter.deleteQueue = newQueue;
            }
            int limit = perThreadPool.NumThreadStatesActive;

            for (int i = 0; i < limit; i++)
            {
                ThreadState next = perThreadPool.GetThreadState(i);
                next.@Lock();
                try
                {
                    if (!next.IsInitialized)
                    {
                        if (closed && next.IsActive)
                        {
                            perThreadPool.DeactivateThreadState(next);
                        }
                        continue;
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(next.dwpt.deleteQueue == flushingQueue || next.dwpt.deleteQueue == documentsWriter.deleteQueue, () => " flushingQueue: " + flushingQueue + " currentqueue: " + documentsWriter.deleteQueue + " perThread queue: " + next.dwpt.deleteQueue + " numDocsInRam: " + next.dwpt.NumDocsInRAM);
                    }
                    if (next.dwpt.deleteQueue != flushingQueue)
                    {
                        // this one is already a new DWPT
                        continue;
                    }
                    AddFlushableState(next);
                }
                finally
                {
                    next.Unlock();
                }
            }
            lock (this)
            {
                /* make sure we move all DWPT that are where concurrently marked as
                 * pending and moved to blocked are moved over to the flushQueue. There is
                 * a chance that this happens since we marking DWPT for full flush without
                 * blocking indexing.*/
                PruneBlockedQueue(flushingQueue);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(AssertBlockedFlushes(documentsWriter.deleteQueue));
                }
                //FlushQueue.AddAll(FullFlushBuffer);
                foreach (var dwpt in fullFlushBuffer)
                {
                    flushQueue.Enqueue(dwpt);
                }
                fullFlushBuffer.Clear();
                UpdateStallState();
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(AssertActiveDeleteQueue(documentsWriter.deleteQueue));
            }
        }

예제 #21

파일 보기

        private IDictionary <int, object> HighlightField(string field, string[] contents, BreakIterator bi, BytesRef[] terms, int[] docids, IList <AtomicReaderContext> leaves, int maxPassages, Query query)
        {
            IDictionary <int, object> highlights = new Dictionary <int, object>();

            PassageFormatter fieldFormatter = GetFormatter(field);

            if (fieldFormatter == null)
            {
                throw new NullReferenceException("PassageFormatter cannot be null");
            }

            // check if we should do any multiterm processing
            Analyzer analyzer = GetIndexAnalyzer(field);

            CharacterRunAutomaton[] automata = Arrays.Empty <CharacterRunAutomaton>();
            if (analyzer != null)
            {
                automata = MultiTermHighlighting.ExtractAutomata(query, field);
            }

            // resize 'terms', where the last term is the multiterm matcher
            if (automata.Length > 0)
            {
                BytesRef[] newTerms = new BytesRef[terms.Length + 1];
                System.Array.Copy(terms, 0, newTerms, 0, terms.Length);
                terms = newTerms;
            }

            // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes
            // otherwise, we will just advance() existing enums to the new document in the same segment.
            DocsAndPositionsEnum[] postings = null;
            TermsEnum termsEnum             = null;
            int       lastLeaf = -1;

            for (int i = 0; i < docids.Length; i++)
            {
                string content = contents[i];
                if (content.Length == 0)
                {
                    continue; // nothing to do
                }
                bi.SetText(content);
                int doc  = docids[i];
                int leaf = ReaderUtil.SubIndex(doc, leaves);
                AtomicReaderContext subContext = leaves[leaf];
                AtomicReader        r          = subContext.AtomicReader;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(leaf >= lastLeaf);                           // increasing order
                }
                // if the segment has changed, we must initialize new enums.
                if (leaf != lastLeaf)
                {
                    Terms t = r.GetTerms(field);
                    if (t != null)
                    {
                        termsEnum = t.GetEnumerator();
                        postings  = new DocsAndPositionsEnum[terms.Length];
                    }
                }
                if (termsEnum == null)
                {
                    continue; // no terms for this field, nothing to do
                }

                // if there are multi-term matches, we have to initialize the "fake" enum for each document
                if (automata.Length > 0)
                {
                    DocsAndPositionsEnum dp = MultiTermHighlighting.GetDocsEnum(analyzer.GetTokenStream(field, content), automata);
                    dp.Advance(doc - subContext.DocBase);
                    postings[terms.Length - 1] = dp; // last term is the multiterm matcher
                }

                Passage[] passages = HighlightDoc(field, terms, content.Length, bi, doc - subContext.DocBase, termsEnum, postings, maxPassages);

                if (passages.Length == 0)
                {
                    // no passages were returned, so ask for a default summary
                    passages = GetEmptyHighlight(field, bi, maxPassages);
                }

                if (passages.Length > 0)
                {
                    highlights[doc] = fieldFormatter.Format(passages, content);
                }

                lastLeaf = leaf;
            }

            return(highlights);
        }

예제 #22

파일 보기

            public override int NextPosition()
            {
                if (lazyProxPointer != -1)
                {
                    proxIn.Seek(lazyProxPointer);
                    lazyProxPointer = -1;
                }

                if (payloadPending && payloadLength > 0)
                {
                    // payload of last position was never retrieved -- skip it
                    proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    payloadPending = false;
                }

                // scan over any docs that were iterated without their positions
                while (posPendingCount > freq)
                {
                    int code = proxIn.ReadVInt32();

                    if (storePayloads)
                    {
                        if ((code & 1) != 0)
                        {
                            // new payload length
                            payloadLength = proxIn.ReadVInt32();
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(payloadLength >= 0);
                            }
                        }
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(payloadLength != -1);
                        }
                    }

                    if (storeOffsets)
                    {
                        if ((proxIn.ReadVInt32() & 1) != 0)
                        {
                            // new offset length
                            offsetLength = proxIn.ReadVInt32();
                        }
                    }

                    if (storePayloads)
                    {
                        proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    }

                    posPendingCount--;
                    position       = 0;
                    startOffset    = 0;
                    payloadPending = false;
                    //System.out.println("StandardR.D&PE skipPos");
                }

                // read next position
                if (payloadPending && payloadLength > 0)
                {
                    // payload wasn't retrieved for last position
                    proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }

                int code_ = proxIn.ReadVInt32();

                if (storePayloads)
                {
                    if ((code_ & 1) != 0)
                    {
                        // new payload length
                        payloadLength = proxIn.ReadVInt32();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(payloadLength >= 0);
                        }
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(payloadLength != -1);
                    }

                    payloadPending = true;
                    code_          = code_.TripleShift(1);
                }
                position += code_;

                if (storeOffsets)
                {
                    int offsetCode = proxIn.ReadVInt32();
                    if ((offsetCode & 1) != 0)
                    {
                        // new offset length
                        offsetLength = proxIn.ReadVInt32();
                    }
                    startOffset += offsetCode.TripleShift(1);
                }

                posPendingCount--;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(posPendingCount >= 0, "NextPosition() was called too many times (more than Freq times) posPendingCount={0}", posPendingCount);
                }

                //System.out.println("StandardR.D&PE nextPos   return pos=" + position);
                return(position);
            }

예제 #23

파일 보기

        /// <summary>
        /// Safe (but, slowish) default method to write every
        /// vector field in the document.
        /// </summary>
        protected void AddAllDocVectors(Fields vectors, MergeState mergeState)
        {
            if (vectors == null)
            {
                StartDocument(0);
                FinishDocument();
                return;
            }

            int numFields = vectors.Count;

            if (numFields == -1)
            {
                // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
                numFields = 0;
                //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
                foreach (string it in vectors)
                {
                    numFields++;
                }
            }
            StartDocument(numFields);

            string lastFieldName = null;

            TermsEnum            termsEnum            = null;
            DocsAndPositionsEnum docsAndPositionsEnum = null;

            int fieldCount = 0;

            foreach (string fieldName in vectors)
            {
                fieldCount++;
                FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(lastFieldName == null || fieldName.CompareToOrdinal(lastFieldName) > 0, "lastFieldName={0} fieldName={1}", lastFieldName, fieldName);
                }
                lastFieldName = fieldName;

                Terms terms = vectors.GetTerms(fieldName);
                if (terms == null)
                {
                    // FieldsEnum shouldn't lie...
                    continue;
                }

                bool hasPositions = terms.HasPositions;
                bool hasOffsets   = terms.HasOffsets;
                bool hasPayloads  = terms.HasPayloads;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(!hasPayloads || hasPositions);
                }

                int numTerms = (int)terms.Count;
                if (numTerms == -1)
                {
                    // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
                    numTerms  = 0;
                    termsEnum = terms.GetEnumerator(termsEnum);
                    while (termsEnum.MoveNext())
                    {
                        numTerms++;
                    }
                }

                StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
                termsEnum = terms.GetEnumerator(termsEnum);

                int termCount = 0;
                while (termsEnum.MoveNext())
                {
                    termCount++;

                    int freq = (int)termsEnum.TotalTermFreq;

                    StartTerm(termsEnum.Term, freq);

                    if (hasPositions || hasOffsets)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docsAndPositionsEnum != null);
                        }

                        int docID = docsAndPositionsEnum.NextDoc();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
                            Debugging.Assert(docsAndPositionsEnum.Freq == freq);
                        }

                        for (int posUpto = 0; posUpto < freq; posUpto++)
                        {
                            int pos         = docsAndPositionsEnum.NextPosition();
                            int startOffset = docsAndPositionsEnum.StartOffset;
                            int endOffset   = docsAndPositionsEnum.EndOffset;

                            BytesRef payload = docsAndPositionsEnum.GetPayload();

                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(!hasPositions || pos >= 0);
                            }
                            AddPosition(pos, startOffset, endOffset, payload);
                        }
                    }
                    FinishTerm();
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termCount == numTerms);
                }
                FinishField();
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(fieldCount == numFields);
            }
            FinishDocument();
        }

예제 #24

파일 보기

        public override void Warm(AtomicReader reader)
        {
            long startTime      = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            int  indexedCount   = 0;
            int  docValuesCount = 0;
            int  normsCount     = 0;

            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.IsIndexed)
                {
                    reader.GetTerms(info.Name);
                    indexedCount++;

                    if (info.HasNorms)
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues)
                {
                    switch (info.DocValuesType)
                    {
                    case DocValuesType.NUMERIC:
                        reader.GetNumericDocValues(info.Name);
                        break;

                    case DocValuesType.BINARY:
                        reader.GetBinaryDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED:
                        reader.GetSortedDocValues(info.Name);
                        break;

                    case DocValuesType.SORTED_SET:
                        reader.GetSortedSetDocValues(info.Name);
                        break;

                    default:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(false);                               // unknown dv type
                        }
                        break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (infoStream.IsEnabled("SMSW"))
            {
                infoStream.Message("SMSW",
                                   "Finished warming segment: " + reader +
                                   ", indexed=" + indexedCount +
                                   ", docValues=" + docValuesCount +
                                   ", norms=" + normsCount +
                                   ", time=" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startTime)); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }
        }

예제 #25

파일 보기

파일: Builder.cs 프로젝트: harunoknel/YAFNET

        // for debugging

        /*
         * private String toString(BytesRef b) {
         * try {
         *  return b.utf8ToString() + " " + b;
         * } catch (Throwable t) {
         *  return b.toString();
         * }
         * }
         */

        /// <summary>
        /// It's OK to add the same input twice in a row with
        /// different outputs, as long as outputs impls the merge
        /// method. Note that input is fully consumed after this
        /// method is returned (so caller is free to reuse), but
        /// output is not.  So if your outputs are changeable (eg
        /// <see cref="ByteSequenceOutputs"/> or
        /// <see cref="Int32SequenceOutputs"/>) then you cannot reuse across
        /// calls.
        /// </summary>
        public virtual void Add(Int32sRef input, T output)
        {
            /*
             * if (DEBUG) {
             * BytesRef b = new BytesRef(input.length);
             * for(int x=0;x<input.length;x++) {
             *  b.bytes[x] = (byte) input.ints[x];
             * }
             * b.length = input.length;
             * if (output == NO_OUTPUT) {
             *  System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
             * } else {
             *  System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
             * }
             * }
             */

            // De-dup NO_OUTPUT since it must be a singleton:
            if (output.Equals(NO_OUTPUT))
            {
                output = NO_OUTPUT;
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(lastInput.Length == 0 || input.CompareTo(lastInput) >= 0, () => "inputs are added out of order lastInput=" + lastInput + " vs input=" + input);
                Debugging.Assert(ValidOutput(output));
            }

            //System.out.println("\nadd: " + input);
            if (input.Length == 0)
            {
                // empty input: only allowed as first input.  we have
                // to special case this because the packed FST
                // format cannot represent the empty input since
                // 'finalness' is stored on the incoming arc, not on
                // the node
                frontier[0].InputCount++;
                frontier[0].IsFinal = true;
                fst.EmptyOutput     = output;
                return;
            }

            // compare shared prefix length
            int pos1     = 0;
            int pos2     = input.Offset;
            int pos1Stop = Math.Min(lastInput.Length, input.Length);

            while (true)
            {
                frontier[pos1].InputCount++;
                //System.out.println("  incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" + frontier[pos1]);
                if (pos1 >= pos1Stop || lastInput.Int32s[pos1] != input.Int32s[pos2])
                {
                    break;
                }
                pos1++;
                pos2++;
            }
            int prefixLenPlus1 = pos1 + 1;

            if (frontier.Length < input.Length + 1)
            {
                UnCompiledNode <T>[] next = new UnCompiledNode <T> [ArrayUtil.Oversize(input.Length + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                Array.Copy(frontier, 0, next, 0, frontier.Length);
                for (int idx = frontier.Length; idx < next.Length; idx++)
                {
                    next[idx] = new UnCompiledNode <T>(this, idx);
                }
                frontier = next;
            }

            // minimize/compile states from previous input's
            // orphan'd suffix
            DoFreezeTail(prefixLenPlus1);

            // init tail states for current input
            for (int idx = prefixLenPlus1; idx <= input.Length; idx++)
            {
                frontier[idx - 1].AddArc(input.Int32s[input.Offset + idx - 1], frontier[idx]);
                frontier[idx].InputCount++;
            }

            UnCompiledNode <T> lastNode = frontier[input.Length];

            if (lastInput.Length != input.Length || prefixLenPlus1 != input.Length + 1)
            {
                lastNode.IsFinal = true;
                lastNode.Output  = NO_OUTPUT;
            }

            // push conflicting outputs forward, only as far as
            // needed
            for (int idx = 1; idx < prefixLenPlus1; idx++)
            {
                UnCompiledNode <T> node       = frontier[idx];
                UnCompiledNode <T> parentNode = frontier[idx - 1];

                T lastOutput = parentNode.GetLastOutput(input.Int32s[input.Offset + idx - 1]);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(ValidOutput(lastOutput));
                }

                T commonOutputPrefix;
                T wordSuffix;

                if (!lastOutput.Equals(NO_OUTPUT))
                {
                    commonOutputPrefix = fst.Outputs.Common(output, lastOutput);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ValidOutput(commonOutputPrefix));
                    }
                    wordSuffix = fst.Outputs.Subtract(lastOutput, commonOutputPrefix);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ValidOutput(wordSuffix));
                    }
                    parentNode.SetLastOutput(input.Int32s[input.Offset + idx - 1], commonOutputPrefix);
                    node.PrependOutput(wordSuffix);
                }
                else
                {
                    commonOutputPrefix = /*wordSuffix =*/ NO_OUTPUT; // LUCENENET: Removed unnecessary assignment
                }

                output = fst.Outputs.Subtract(output, commonOutputPrefix);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(ValidOutput(output));
                }
            }

            if (lastInput.Length == input.Length && prefixLenPlus1 == 1 + input.Length)
            {
                // same input more than 1 time in a row, mapping to
                // multiple outputs
                lastNode.Output = fst.Outputs.Merge(lastNode.Output, output);
            }
            else
            {
                // this new arc is private to this new input; set its
                // arc output to the leftover output:
                frontier[prefixLenPlus1 - 1].SetLastOutput(input.Int32s[input.Offset + prefixLenPlus1 - 1], output);
            }

            // save last input
            lastInput.CopyInt32s(input);

            //System.out.println("  count[0]=" + frontier[0].inputCount);
        }

예제 #26

파일 보기

            public virtual void _run()
            {
                for (int iter = 0; iter < NUM_TEST_ITER; iter++)
                {
                    FieldData field     = fields[Random.Next(fields.Length)];
                    TermsEnum termsEnum = termsDict.GetTerms(field.fieldInfo.Name).GetEnumerator();
#pragma warning disable 612, 618
                    if (si.Codec is Lucene3xCodec)
#pragma warning restore 612, 618
                    {
                        // code below expects unicode sort order
                        continue;
                    }

                    int upto = 0;
                    // Test straight enum of the terms:
                    while (termsEnum.MoveNext())
                    {
                        BytesRef term     = termsEnum.Term;
                        BytesRef expected = new BytesRef(field.terms[upto++].text2);
                        Assert.IsTrue(expected.BytesEquals(term), "expected=" + expected + " vs actual " + term);
                    }
                    Assert.AreEqual(upto, field.terms.Length);

                    // Test random seek:
                    TermData             term2  = field.terms[Random.Next(field.terms.Length)];
                    TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(term2.text2));
                    Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                    Assert.AreEqual(term2.docs.Length, termsEnum.DocFreq);
                    if (field.omitTF)
                    {
                        this.VerifyDocs(term2.docs, term2.positions, TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE), false);
                    }
                    else
                    {
                        this.VerifyDocs(term2.docs, term2.positions, termsEnum.DocsAndPositions(null, null), true);
                    }

                    // Test random seek by ord:
                    int idx = Random.Next(field.terms.Length);
                    term2 = field.terms[idx];
                    bool success = false;
                    try
                    {
                        termsEnum.SeekExact(idx);
                        success = true;
                    }
                    catch (Exception uoe) when(uoe.IsUnsupportedOperationException())
                    {
                        // ok -- skip it
                    }
                    if (success)
                    {
                        Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                        Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(term2.text2)));
                        Assert.AreEqual(term2.docs.Length, termsEnum.DocFreq);
                        if (field.omitTF)
                        {
                            this.VerifyDocs(term2.docs, term2.positions, TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE), false);
                        }
                        else
                        {
                            this.VerifyDocs(term2.docs, term2.positions, termsEnum.DocsAndPositions(null, null), true);
                        }
                    }

                    // Test seek to non-existent terms:
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: seek non-exist terms");
                    }
                    for (int i = 0; i < 100; i++)
                    {
                        string text2 = TestUtil.RandomUnicodeString(Random) + ".";
                        status = termsEnum.SeekCeil(new BytesRef(text2));
                        Assert.IsTrue(status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
                    }

                    // Seek to each term, backwards:
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: seek terms backwards");
                    }
                    for (int i = field.terms.Length - 1; i >= 0; i--)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(field.terms[i].text2)), Thread.CurrentThread.Name + ": field=" + field.fieldInfo.Name + " term=" + field.terms[i].text2);
                        Assert.AreEqual(field.terms[i].docs.Length, termsEnum.DocFreq);
                    }

                    // Seek to each term by ord, backwards
                    for (int i = field.terms.Length - 1; i >= 0; i--)
                    {
                        try
                        {
                            termsEnum.SeekExact(i);
                            Assert.AreEqual(field.terms[i].docs.Length, termsEnum.DocFreq);
                            Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.terms[i].text2)));
                        }
                        catch (Exception uoe) when(uoe.IsUnsupportedOperationException())
                        {
                        }
                    }

                    // Seek to non-existent empty-string term
                    status = termsEnum.SeekCeil(new BytesRef(""));
                    Assert.IsNotNull(status);
                    //Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);

                    // Make sure we're now pointing to first term
                    Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.terms[0].text2)));

                    // Test docs enum
                    termsEnum.SeekCeil(new BytesRef(""));
                    upto = 0;
                    do
                    {
                        term2 = field.terms[upto];
                        if (Random.Next(3) == 1)
                        {
                            DocsEnum             docs;
                            DocsEnum             docsAndFreqs;
                            DocsAndPositionsEnum postings;
                            if (!field.omitTF)
                            {
                                postings = termsEnum.DocsAndPositions(null, null);
                                if (postings != null)
                                {
                                    docs = docsAndFreqs = postings;
                                }
                                else
                                {
                                    docs = docsAndFreqs = TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.FREQS);
                                }
                            }
                            else
                            {
                                postings     = null;
                                docsAndFreqs = null;
                                docs         = TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE);
                            }
                            Assert.IsNotNull(docs);
                            int  upto2 = -1;
                            bool ended = false;
                            while (upto2 < term2.docs.Length - 1)
                            {
                                // Maybe skip:
                                int left = term2.docs.Length - upto2;
                                int doc;
                                if (Random.Next(3) == 1 && left >= 1)
                                {
                                    int inc = 1 + Random.Next(left - 1);
                                    upto2 += inc;
                                    if (Random.Next(2) == 1)
                                    {
                                        doc = docs.Advance(term2.docs[upto2]);
                                        Assert.AreEqual(term2.docs[upto2], doc);
                                    }
                                    else
                                    {
                                        doc = docs.Advance(1 + term2.docs[upto2]);
                                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                        {
                                            // skipped past last doc
                                            if (Debugging.AssertsEnabled)
                                            {
                                                Debugging.Assert(upto2 == term2.docs.Length - 1);
                                            }
                                            ended = true;
                                            break;
                                        }
                                        else
                                        {
                                            // skipped to next doc
                                            if (Debugging.AssertsEnabled)
                                            {
                                                Debugging.Assert(upto2 < term2.docs.Length - 1);
                                            }
                                            if (doc >= term2.docs[1 + upto2])
                                            {
                                                upto2++;
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    doc = docs.NextDoc();
                                    Assert.IsTrue(doc != -1);
                                    upto2++;
                                }
                                Assert.AreEqual(term2.docs[upto2], doc);
                                if (!field.omitTF)
                                {
                                    Assert.AreEqual(term2.positions[upto2].Length, postings.Freq);
                                    if (Random.Next(2) == 1)
                                    {
                                        this.VerifyPositions(term2.positions[upto2], postings);
                                    }
                                }
                            }

                            if (!ended)
                            {
                                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docs.NextDoc());
                            }
                        }
                        upto++;
                    } while (termsEnum.MoveNext());

                    Assert.AreEqual(upto, field.terms.Length);
                }
            }

예제 #27

파일 보기

            public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords)
            {
                long     valueCount = 0;
                BytesRef lastValue  = null;

                foreach (BytesRef b in values)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(b != null);
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(b.IsValid());
                    }
                    if (valueCount > 0)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(b.CompareTo(lastValue) > 0);
                        }
                    }
                    lastValue = BytesRef.DeepCopyOf(b);
                    valueCount++;
                }

                int         docCount = 0;
                long        ordCount = 0;
                Int64BitSet seenOrds = new Int64BitSet(valueCount);

                using IEnumerator <long?> ordIterator = ords.GetEnumerator();
                foreach (long?v in docToOrdCount)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(v != null);
                    }
                    int count = (int)v.Value;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(count >= 0);
                    }
                    docCount++;
                    ordCount += count;

                    long lastOrd = -1;
                    for (int i = 0; i < count; i++)
                    {
                        ordIterator.MoveNext();
                        long?o = ordIterator.Current;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(o != null);
                        }
                        long ord = o.Value;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(ord >= 0 && ord < valueCount);
                        }
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(ord > lastOrd, "ord={0},lastOrd={1}", ord, lastOrd);
                        }
                        seenOrds.Set(ord);
                        lastOrd = ord;
                    }
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(ordIterator.MoveNext() == false);
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(docCount == maxDoc);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(seenOrds.Cardinality() == valueCount);
                }
                CheckIterator(values.GetEnumerator(), valueCount, false);
                CheckIterator(docToOrdCount.GetEnumerator(), maxDoc, false);
                CheckIterator(ords.GetEnumerator(), ordCount, false);
                @in.AddSortedSetField(field, values, docToOrdCount, ords);
            }

예제 #28

파일 보기

        public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN)
        {
            ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone();
            Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) => a.Doc - b.Doc));

            IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves;

            Weight weight = searcher.CreateNormalizedWeight(query);

            // Now merge sort docIDs from hits, with reader's leaves:
            int    hitUpto    = 0;
            int    readerUpto = -1;
            int    endDoc     = 0;
            int    docBase    = 0;
            Scorer scorer     = null;

            while (hitUpto < hits.Length)
            {
                ScoreDoc            hit           = hits[hitUpto];
                int                 docID         = hit.Doc;
                AtomicReaderContext readerContext = null;
                while (docID >= endDoc)
                {
                    readerUpto++;
                    readerContext = leaves[readerUpto];
                    endDoc        = readerContext.DocBase + readerContext.Reader.MaxDoc;
                }

                if (readerContext != null)
                {
                    // We advanced to another segment:
                    docBase = readerContext.DocBase;
                    scorer  = weight.GetScorer(readerContext, null);
                }

                int targetDoc = docID - docBase;
                int actualDoc = scorer.DocID;
                if (actualDoc < targetDoc)
                {
                    actualDoc = scorer.Advance(targetDoc);
                }

                if (actualDoc == targetDoc)
                {
                    // Query did match this doc:
                    hit.Score = Combine(hit.Score, true, scorer.GetScore());
                }
                else
                {
                    // Query did not match this doc:
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(actualDoc > targetDoc);
                    }
                    hit.Score = Combine(hit.Score, false, 0.0f);
                }

                hitUpto++;
            }

            // TODO: we should do a partial sort (of only topN)
            // instead, but typically the number of hits is
            // smallish:
            Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) =>
            {
                // Sort by score descending, then docID ascending:
                if (a.Score > b.Score)
                {
                    return(-1);
                }
                else if (a.Score < b.Score)
                {
                    return(1);
                }
                else
                {
                    // this subtraction can't overflow int
                    // because docIDs are >= 0:
                    return(a.Doc - b.Doc);
                }
            }));

            if (topN < hits.Length)
            {
                ScoreDoc[] subset = new ScoreDoc[topN];
                Array.Copy(hits, 0, subset, 0, topN);
                hits = subset;
            }

            return(new TopDocs(firstPassTopDocs.TotalHits, hits, hits[0].Score));
        }

예제 #29

파일 보기

        // Delete by Term
        private long ApplyTermDeletes(IEnumerable <Term> termsIter, ReadersAndUpdates rld, SegmentReader reader)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                long   delCount = 0;
                Fields fields   = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return(0);
                }

                TermsEnum termsEnum = null;

                string   currentField = null;
                DocsEnum docs         = null;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CheckDeleteTerm(null));
                }

                bool any = false;

                //System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader);
                foreach (Term term in termsIter)
                {
                    // Since we visit terms sorted, we gain performance
                    // by re-using the same TermsEnum and seeking only
                    // forwards
                    if (!string.Equals(term.Field, currentField, StringComparison.Ordinal))
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(currentField == null || currentField.CompareToOrdinal(term.Field) < 0);
                        }
                        currentField = term.Field;
                        Terms terms = fields.GetTerms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.GetEnumerator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(CheckDeleteTerm(term));
                    }

                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE);
                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        if (docsEnum != null)
                        {
                            while (true)
                            {
                                int docID = docsEnum.NextDoc();
                                //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
                                if (docID == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                if (!any)
                                {
                                    rld.InitWritableLiveDocs();
                                    any = true;
                                }
                                // NOTE: there is no limit check on the docID
                                // when deleting by Term (unlike by Query)
                                // because on flush we apply all Term deletes to
                                // each segment.  So all Term deleting here is
                                // against prior segments:
                                if (rld.Delete(docID))
                                {
                                    delCount++;
                                }
                            }
                        }
                    }
                }

                return(delCount);
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }

예제 #30

파일 보기

        /// <summary>
        /// Used by near real-time search </summary>
        internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes)
        {
            // IndexWriter synchronizes externally before calling
            // us, which ensures infos will not change; so there's
            // no need to process segments in reverse order
            int numSegments = infos.Count;

            IList <SegmentReader> readers = new List <SegmentReader>();
            Directory             dir     = writer.Directory;

            SegmentInfos segmentInfos = (SegmentInfos)infos.Clone();
            int          infosUpto    = 0;
            bool         success      = false;

            try
            {
                for (int i = 0; i < numSegments; i++)
                {
                    // NOTE: important that we use infos not
                    // segmentInfos here, so that we are passing the
                    // actual instance of SegmentInfoPerCommit in
                    // IndexWriter's segmentInfos:
                    SegmentCommitInfo info = infos.Info(i);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(info.Info.Dir == dir);
                    }
                    ReadersAndUpdates rld = writer.readerPool.Get(info, true);
                    try
                    {
                        SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ);
                        if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments)
                        {
                            // Steal the ref:
                            readers.Add(reader);
                            infosUpto++;
                        }
                        else
                        {
                            reader.DecRef();
                            segmentInfos.Remove(infosUpto);
                        }
                    }
                    finally
                    {
                        writer.readerPool.Release(rld);
                    }
                }

                writer.IncRefDeleter(segmentInfos);

                StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes);
                success = true;
                return(result);
            }
            finally
            {
                if (!success)
                {
                    foreach (SegmentReader r in readers)
                    {
                        try
                        {
                            r.DecRef();
                        }
#pragma warning disable 168
                        catch (Exception th)
#pragma warning restore 168
                        {
                            // ignore any exception that is thrown here to not mask any original
                            // exception.
                        }
                    }
                }
            }
        }