public override void Dump()
 {
     Debugging.CurrentPrice(Instrument);
     Debugging.Dump(Instrument, range: new Range(-100, 1), indicators: new[] { MA0, MA1 });
     Debugging.Dump(new SnR(Instrument));
 }
Beispiel #2
0
        //public static void main( string[] args ) throws Exception {
        //  Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
        //  QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,  "f", analyzer );
        //  Query query = parser.parse( "a x:b" );
        //  FieldQuery fieldQuery = new FieldQuery( query, true, false );

        //  Directory dir = new RAMDirectory();
        //  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
        //  Document doc = new Document();
        //  IndexableFieldType ft = new IndexableFieldType(TextField.TYPE_STORED);
        //  ft.setStoreTermVectors(true);
        //  ft.setStoreTermVectorOffsets(true);
        //  ft.setStoreTermVectorPositions(true);
        //  doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
        //  doc.add( new Field( "f", ft, "b a b a f" ) );
        //  writer.addDocument( doc );
        //  writer.close();

        //  IndexReader reader = IndexReader.open(dir1);
        //  new FieldTermStack( reader, 0, "f", fieldQuery );
        //  reader.close();
        //}

        /// <summary>
        /// a constructor.
        /// </summary>
        /// <param name="reader"><see cref="IndexReader"/> of the index</param>
        /// <param name="docId">document id to be highlighted</param>
        /// <param name="fieldName">field of the document to be highlighted</param>
        /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery)
        {
            this.fieldName = fieldName;

            ISet <string> termSet = fieldQuery.GetTermSet(fieldName);

            // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
            if (termSet == null)
            {
                return;
            }

            Fields vectors = reader.GetTermVectors(docId);

            if (vectors == null)
            {
                // null snippet
                return;
            }

            Terms vector = vectors.GetTerms(fieldName);

            if (vector == null)
            {
                // null snippet
                return;
            }

            CharsRef             spare     = new CharsRef();
            TermsEnum            termsEnum = vector.GetEnumerator();
            DocsAndPositionsEnum dpEnum    = null;
            BytesRef             text;

            int numDocs = reader.MaxDoc;

            while (termsEnum.MoveNext())
            {
                text = termsEnum.Term;
                UnicodeUtil.UTF8toUTF16(text, spare);
                string term = spare.ToString();
                if (!termSet.Contains(term))
                {
                    continue;
                }
                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                if (dpEnum == null)
                {
                    // null snippet
                    return;
                }

                dpEnum.NextDoc();

                // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
                float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0);

                int freq = dpEnum.Freq;

                for (int i = 0; i < freq; i++)
                {
                    int pos = dpEnum.NextPosition();
                    if (dpEnum.StartOffset < 0)
                    {
                        return; // no offsets, null snippet
                    }
                    termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight));
                }
            }

            // sort by position
            CollectionUtil.TimSort(termList);

            // now look for dups at the same position, linking them together
            int      currentPos = -1;
            TermInfo previous   = null;
            TermInfo first      = null;

            for (int i = 0; i < termList.Count;)
            {
                TermInfo current = termList[i];
                if (current.Position == currentPos)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(previous != null);
                    }
                    previous.SetNext(current);
                    previous = current;
                    //iterator.Remove();

                    // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item)
                    termList.RemoveAt(i);
                }
                else
                {
                    if (previous != null)
                    {
                        previous.SetNext(first);
                    }
                    previous   = first = current;
                    currentPos = current.Position;

                    // LUCENENET NOTE: Only increment the position if we don't do a delete.
                    i++;
                }
            }

            if (previous != null)
            {
                previous.SetNext(first);
            }
        }
        internal override void AddTerm(int termID)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));
            }

            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(!hasFreq || postings.termFreqs[termID] > 0);
            }

            if (!hasFreq)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(postings.termFreqs == null);
                }
                if (docState.docID != postings.lastDocIDs[termID])
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(docState.docID > postings.lastDocIDs[termID]);
                    }
                    termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID]);
                    postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
                    postings.lastDocIDs[termID]   = docState.docID;
                    fieldState.UniqueTermCount++;
                }
            }
            else if (docState.docID != postings.lastDocIDs[termID])
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(docState.docID > postings.lastDocIDs[termID], "id: {0} postings ID: {1} termID: {2}", docState.docID, postings.lastDocIDs[termID], termID);
                }
                // Term not yet seen in the current doc but previously
                // seen in other doc(s) since the last flush

                // Now that we know doc freq for previous doc,
                // write it & lastDocCode
                if (1 == postings.termFreqs[termID])
                {
                    termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID] | 1);
                }
                else
                {
                    termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID]);
                    termsHashPerField.WriteVInt32(0, postings.termFreqs[termID]);
                }
                postings.termFreqs[termID]    = 1;
                fieldState.MaxTermFrequency   = Math.Max(1, fieldState.MaxTermFrequency);
                postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
                postings.lastDocIDs[termID]   = docState.docID;
                if (hasProx)
                {
                    WriteProx(termID, fieldState.Position);
                    if (hasOffsets)
                    {
                        postings.lastOffsets[termID] = 0;
                        WriteOffsets(termID, fieldState.Offset);
                    }
                }
                else
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(!hasOffsets);
                    }
                }
                fieldState.UniqueTermCount++;
            }
            else
            {
                fieldState.MaxTermFrequency = Math.Max(fieldState.MaxTermFrequency, ++postings.termFreqs[termID]);
                if (hasProx)
                {
                    WriteProx(termID, fieldState.Position - postings.lastPositions[termID]);
                }
                if (hasOffsets)
                {
                    WriteOffsets(termID, fieldState.Offset);
                }
            }
        }
        private void BtnCheckSeed_Click(object sender, EventArgs e)
        {
            if (!chkShowObtainable.Checked && !chkShowUnobtainable.Checked)
            {
                LBResult.Items.Clear();
                return;
            }

            var logicCopy = Utility.CloneTrackerInstance(CheckerInstance);

            foreach (var entry in logicCopy.Logic)
            {
                entry.Available = false;
                entry.Checked   = false;
                entry.Aquired   = false;
                if (entry.SpoilerRandom > -1)
                {
                    entry.RandomizedItem = entry.SpoilerRandom;
                }                                                                            //Make the items randomized item its spoiler item, just for consitancy sake
                else if (entry.RandomizedItem > -1)
                {
                    entry.SpoilerRandom = entry.RandomizedItem;
                }                                                                                  //If the item doesn't have spoiler data, but does have a randomized item. set it's spoiler data to the randomized item
                else if (entry.Unrandomized(2))
                {
                    entry.SpoilerRandom = entry.ID; entry.RandomizedItem = entry.ID;
                }                                                                                                   //If the item doesn't have spoiler data or a randomized item and is unrandomized (manual), set it's spoiler item to it's self
            }

            LBResult.Items.Clear();
            List <int> Ignored = new List <int>();

            foreach (var item in LBIgnoredChecks.Items)
            {
                Ignored.Add((item as LogicObjects.ListItem).PathID);
            }

            var GameClearEntry = logicCopy.Logic.Find(x => x.DictionaryName == "MMRTGameClear");

            if (GameClearEntry != null)
            {
                GameClearEntry.ItemName = (LogicObjects.MainTrackerInstance.IsMM()) ? "Defeat Majora" : "Beat the Game";
            }
            else if (LogicObjects.MainTrackerInstance.IsMM())
            {
                int GameClearID = PlaythroughGenerator.GetGameClearEntry(logicCopy.Logic, LogicObjects.MainTrackerInstance.IsEntranceRando());
                logicCopy.Logic[GameClearID].ItemName = "Defeat Majora";
            }

            CheckSeed(logicCopy, true, Ignored);
            List <string> obtainable   = new List <string>();
            List <string> unobtainable = new List <string>();

            foreach (var item in LBNeededItems.Items)
            {
                bool   Spoil           = false;
                var    ListItem        = item as LogicObjects.ListItem;
                var    iteminLogic     = logicCopy.Logic[ListItem.PathID];
                string ItemName        = iteminLogic.ItemName ?? iteminLogic.DictionaryName;
                var    ItemsLocation   = iteminLogic.GetItemsNewLocation(logicCopy.Logic);
                string LocationFoundAt = (ItemsLocation != null) ? ItemsLocation.LocationName ?? ItemsLocation.DictionaryName : "";
                string DisplayName     = (Spoil) ? ItemName + ": " + LocationFoundAt : ItemName;

                Debugging.Log(logicCopy.Logic[ListItem.PathID].DictionaryName + " " + logicCopy.Logic[ListItem.PathID].Aquired);
                if (logicCopy.Logic[ListItem.PathID].Aquired)
                {
                    obtainable.Add(DisplayName);
                }
                else
                {
                    unobtainable.Add(DisplayName);
                }
            }
            if (unobtainable.Count > 0 && chkShowUnobtainable.Checked)
            {
                LBResult.Items.Add("Unobtainable ==============================");
                foreach (var i in unobtainable)
                {
                    LBResult.Items.Add(i);
                }
            }
            if (obtainable.Count > 0 && chkShowObtainable.Checked)
            {
                LBResult.Items.Add("Obtainable ==============================");
                foreach (var i in obtainable)
                {
                    LBResult.Items.Add(i);
                }
            }
        }
Beispiel #5
0
        /// <summary>
        /// Safe (but, slowish) default method to write every
        /// vector field in the document.
        /// </summary>
        protected void AddAllDocVectors(Fields vectors, MergeState mergeState)
        {
            if (vectors == null)
            {
                StartDocument(0);
                FinishDocument();
                return;
            }

            int numFields = vectors.Count;

            if (numFields == -1)
            {
                // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
                numFields = 0;
                //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
                foreach (string it in vectors)
                {
                    numFields++;
                }
            }
            StartDocument(numFields);

            string lastFieldName = null;

            TermsEnum            termsEnum            = null;
            DocsAndPositionsEnum docsAndPositionsEnum = null;

            int fieldCount = 0;

            foreach (string fieldName in vectors)
            {
                fieldCount++;
                FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(lastFieldName == null || fieldName.CompareToOrdinal(lastFieldName) > 0, "lastFieldName={0} fieldName={1}", lastFieldName, fieldName);
                }
                lastFieldName = fieldName;

                Terms terms = vectors.GetTerms(fieldName);
                if (terms == null)
                {
                    // FieldsEnum shouldn't lie...
                    continue;
                }

                bool hasPositions = terms.HasPositions;
                bool hasOffsets   = terms.HasOffsets;
                bool hasPayloads  = terms.HasPayloads;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(!hasPayloads || hasPositions);
                }

                int numTerms = (int)terms.Count;
                if (numTerms == -1)
                {
                    // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
                    numTerms  = 0;
                    termsEnum = terms.GetEnumerator(termsEnum);
                    while (termsEnum.MoveNext())
                    {
                        numTerms++;
                    }
                }

                StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
                termsEnum = terms.GetEnumerator(termsEnum);

                int termCount = 0;
                while (termsEnum.MoveNext())
                {
                    termCount++;

                    int freq = (int)termsEnum.TotalTermFreq;

                    StartTerm(termsEnum.Term, freq);

                    if (hasPositions || hasOffsets)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docsAndPositionsEnum != null);
                        }

                        int docID = docsAndPositionsEnum.NextDoc();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
                            Debugging.Assert(docsAndPositionsEnum.Freq == freq);
                        }

                        for (int posUpto = 0; posUpto < freq; posUpto++)
                        {
                            int pos         = docsAndPositionsEnum.NextPosition();
                            int startOffset = docsAndPositionsEnum.StartOffset;
                            int endOffset   = docsAndPositionsEnum.EndOffset;

                            BytesRef payload = docsAndPositionsEnum.GetPayload();

                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(!hasPositions || pos >= 0);
                            }
                            AddPosition(pos, startOffset, endOffset, payload);
                        }
                    }
                    FinishTerm();
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termCount == numTerms);
                }
                FinishField();
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(fieldCount == numFields);
            }
            FinishDocument();
        }
        public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
        {
            string segment        = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int    docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int    size           = si.DocCount;

            bool success = false;

            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    storeCFSReader = null;
                }
                string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
                tvx    = d.OpenInput(idxName, context);
                format = CheckValidFormat(tvx);
                string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
                tvd = d.OpenInput(fn, context);
                int tvdFormat = CheckValidFormat(tvd);
                fn  = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
                tvf = d.OpenInput(fn, context);
                int tvfFormat = CheckValidFormat(tvf);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(format == tvdFormat);
                    Debugging.Assert(format == tvfFormat);
                }

                numTotalDocs = (int)(tvx.Length >> 4);

                if (-1 == docStoreOffset)
                {
                    this.docStoreOffset = 0;
                    this.size           = numTotalDocs;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(size == 0 || numTotalDocs == size);
                    }
                }
                else
                {
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;
                    // Verify the file is long enough to hold all of our
                    // docs
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs={0} size={1} docStoreOffset={2}", numTotalDocs, size, docStoreOffset);
                    }
                }

                this.fieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception t) when(t.IsThrowable())
                    {
                    }
                }
            }
        }
Beispiel #7
0
            internal Cell?nextCell; //see getLeafDocs

            /// <remarks>This is the primary algorithm; recursive.  Returns null if finds none.</remarks>
            /// <exception cref="IOException"></exception>
            internal SmallDocSet?Visit(Cell cell, IBits acceptContains)
            {
                if (m_termsEnum is null)
                {
                    //signals all done
                    return(null);
                }

                ContainsPrefixTreeFilter outerInstance = (ContainsPrefixTreeFilter)base.m_filter;

                //Leaf docs match all query shape
                SmallDocSet?leafDocs = GetLeafDocs(cell, acceptContains);
                // Get the AND of all child results (into combinedSubResults)
                SmallDocSet?combinedSubResults = null;
                //   Optimization: use null subCellsFilter when we know cell is within the query shape.
                IShape?subCellsFilter = outerInstance.m_queryShape;

                if (cell.Level != 0 && ((cell.ShapeRel == SpatialRelation.None || cell.ShapeRel == SpatialRelation.Within)))
                {
                    subCellsFilter = null;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(cell.Shape.Relate(outerInstance.m_queryShape) == SpatialRelation.Within);
                    }
                }
                ICollection <Cell> subCells = cell.GetSubCells(subCellsFilter);

                foreach (Cell subCell in subCells)
                {
                    if (!SeekExact(subCell))
                    {
                        combinedSubResults = null;
                    }
                    else if (subCell.Level == outerInstance.m_detailLevel)
                    {
                        combinedSubResults = GetDocs(subCell, acceptContains);
                    }
                    else if (!outerInstance.m_multiOverlappingIndexedShapes &&
                             subCell.ShapeRel == SpatialRelation.Within)
                    {
                        combinedSubResults = GetLeafDocs(subCell, acceptContains); //recursion
                    }
                    else
                    {
                        combinedSubResults = Visit(subCell, acceptContains);
                    }

                    if (combinedSubResults is null)
                    {
                        break;
                    }

                    acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
                }

                // Result: OR the leaf docs with AND of all child results
                if (combinedSubResults != null)
                {
                    if (leafDocs is null)
                    {
                        return(combinedSubResults);
                    }
                    return(leafDocs.Union(combinedSubResults));//union is 'or'
                }
                return(leafDocs);
            }
Beispiel #8
0
        public override SeekStatus SeekCeil(BytesRef term)
        {
            queue.Clear();
            numTop = 0;
            lastSeekExact = false;

            bool seekOpt = false;
            if (lastSeek != null && termComp.Compare(lastSeek, term) <= 0)
            {
                seekOpt = true;
            }

            lastSeekScratch.CopyBytes(term);
            lastSeek = lastSeekScratch;

            for (int i = 0; i < numSubs; i++)
            {
                SeekStatus status;
                // LUCENE-2130: if we had just seek'd already, prior
                // to this seek, and the new seek term is after the
                // previous one, don't try to re-seek this sub if its
                // current term is already beyond this new seek term.
                // Doing so is a waste because this sub will simply
                // seek to the same spot.
                if (seekOpt)
                {
                    BytesRef curTerm = currentSubs[i].Current;
                    if (curTerm != null)
                    {
                        int cmp = termComp.Compare(term, curTerm);
                        if (cmp == 0)
                        {
                            status = SeekStatus.FOUND;
                        }
                        else if (cmp < 0)
                        {
                            status = SeekStatus.NOT_FOUND;
                        }
                        else
                        {
                            status = currentSubs[i].Terms.SeekCeil(term);
                        }
                    }
                    else
                    {
                        status = SeekStatus.END;
                    }
                }
                else
                {
                    status = currentSubs[i].Terms.SeekCeil(term);
                }

                if (status == SeekStatus.FOUND)
                {
                    top[numTop++] = currentSubs[i];
                    current = currentSubs[i].Current = currentSubs[i].Terms.Term;
                }
                else
                {
                    if (status == SeekStatus.NOT_FOUND)
                    {
                        currentSubs[i].Current = currentSubs[i].Terms.Term;
                        if (Debugging.AssertsEnabled) Debugging.Assert(currentSubs[i].Current != null);
                        queue.Add(currentSubs[i]);
                    }
                    else
                    {
                        // enum exhausted
                        currentSubs[i].Current = null;
                    }
                }
            }

            if (numTop > 0)
            {
                // at least one sub had exact match to the requested term
                return SeekStatus.FOUND;
            }
            else if (queue.Count > 0)
            {
                // no sub had exact match, but at least one sub found
                // a term after the requested term -- advance to that
                // next term:
                PullTop();
                return SeekStatus.NOT_FOUND;
            }
            else
            {
                return SeekStatus.END;
            }
        }
Beispiel #9
0
        public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
        {
            // Can only reuse if incoming enum is also a MultiDocsEnum
            // ... and was previously created w/ this MultiTermsEnum:
            if (reuse is null || !(reuse is MultiDocsEnum docsEnum) || !docsEnum.CanReuse(this))
                docsEnum = new MultiDocsEnum(this, subs.Length);

            int upto = 0;

            for (int i = 0; i < numTop; i++)
            {
                TermsEnumWithSlice entry = top[i];

                IBits b;

                if (liveDocs is MultiBits multiLiveDocs)
                {
                    // optimize for common case: requested skip docs is a
                    // congruent sub-slice of MultiBits: in this case, we
                    // just pull the liveDocs from the sub reader, rather
                    // than making the inefficient
                    // Slice(Multi(sub-readers)):
                    MultiBits.SubResult sub = multiLiveDocs.GetMatchingSub(entry.SubSlice);
                    if (sub.Matches)
                    {
                        b = sub.Result;
                    }
                    else
                    {
                        // custom case: requested skip docs is foreign:
                        // must slice it on every access
                        b = new BitsSlice(liveDocs, entry.SubSlice);
                    }
                }
                else if (liveDocs != null)
                {
                    b = new BitsSlice(liveDocs, entry.SubSlice);
                }
                else
                {
                    // no deletions
                    b = null;
                }

                if (Debugging.AssertsEnabled) Debugging.Assert(entry.Index < docsEnum.subDocsEnum.Length, "{0} vs {1}; {2}", entry.Index, docsEnum.subDocsEnum.Length, subs.Length);
                DocsEnum subDocsEnum = entry.Terms.Docs(b, docsEnum.subDocsEnum[entry.Index], flags);
                if (subDocsEnum != null)
                {
                    docsEnum.subDocsEnum[entry.Index] = subDocsEnum;
                    subDocs[upto].DocsEnum = subDocsEnum;
                    subDocs[upto].Slice = entry.SubSlice;
                    upto++;
                }
                else
                {
                    // should this be an error?
                    if (Debugging.AssertsEnabled) Debugging.Assert(false, "One of our subs cannot provide a docsenum");
                }
            }

            if (upto == 0)
            {
                return null;
            }
            else
            {
                return docsEnum.Reset(subDocs, upto);
            }
        }
Beispiel #10
0
        internal long currentEntryIndex; // also indicates how many entries in the index are valid.

        /// <summary>
        /// Construct an Elias-Fano encoder.
        /// After construction, call <see cref="EncodeNext(long)"/> <paramref name="numValues"/> times to encode
        /// a non decreasing sequence of non negative numbers.
        /// </summary>
        /// <param name="numValues"> The number of values that is to be encoded. </param>
        /// <param name="upperBound">  At least the highest value that will be encoded.
        ///                For space efficiency this should not exceed the power of two that equals
        ///                or is the first higher than the actual maximum.
        ///                <para/>When <c>numValues >= (upperBound/3)</c>
        ///                a <see cref="FixedBitSet"/> will take less space. </param>
        /// <param name="indexInterval"> The number of high zero bits for which a single index entry is built.
        ///                The index will have at most <c>2 * numValues / indexInterval</c> entries
        ///                and each index entry will use at most <c>Ceil(Log2(3 * numValues))</c> bits,
        ///                see <see cref="EliasFanoEncoder"/>. </param>
        /// <exception cref="ArgumentException"> when:
        ///         <list type="bullet">
        ///         <item><description><paramref name="numValues"/> is negative, or</description></item>
        ///         <item><description><paramref name="numValues"/> is non negative and <paramref name="upperBound"/> is negative, or</description></item>
        ///         <item><description>the low bits do not fit in a <c>long[]</c>:
        ///             <c>(L * numValues / 64) > System.Int32.MaxValue</c>, or</description></item>
        ///         <item><description>the high bits do not fit in a <c>long[]</c>:
        ///             <c>(2 * numValues / 64) > System.Int32.MaxValue</c>, or</description></item>
        ///         <item><description><c>indexInterval &lt; 2</c>,</description></item>
        ///         <item><description>the index bits do not fit in a <c>long[]</c>:
        ///             <c>(numValues / indexInterval * ceil(2log(3 * numValues)) / 64) > System.Int32.MaxValue</c>.</description></item>
        ///         </list> </exception>
        public EliasFanoEncoder(long numValues, long upperBound, long indexInterval)
        {
            if (numValues < 0L)
            {
                throw new ArgumentException("numValues should not be negative: " + numValues);
            }
            this.numValues = numValues;
            if ((numValues > 0L) && (upperBound < 0L))
            {
                throw new ArgumentException("upperBound should not be negative: " + upperBound + " when numValues > 0");
            }
            this.upperBound = numValues > 0 ? upperBound : -1L; // if there is no value, -1 is the best upper bound
            int nLowBits = 0;

            if (this.numValues > 0) // nLowBits = max(0; floor(2log(upperBound/numValues)))
            {
                long lowBitsFac = this.upperBound / this.numValues;
                if (lowBitsFac > 0)
                {
                    nLowBits = 63 - lowBitsFac.LeadingZeroCount(); // see Long.numberOfLeadingZeros javadocs
                }
            }
            this.numLowBits    = nLowBits;
            this.lowerBitsMask = (long)(unchecked ((ulong)long.MaxValue) >> (sizeof(long) * 8 - 1 - this.numLowBits));

            long numLongsForLowBits = NumInt64sForBits(numValues * numLowBits);

            if (numLongsForLowBits > int.MaxValue)
            {
                throw new ArgumentException("numLongsForLowBits too large to index a long array: " + numLongsForLowBits);
            }
            this.lowerLongs = new long[(int)numLongsForLowBits];

            long numHighBitsClear = (long)((ulong)((this.upperBound > 0) ? this.upperBound : 0) >> this.numLowBits);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numHighBitsClear <= (2 * this.numValues));
            }
            long numHighBitsSet = this.numValues;

            long numLongsForHighBits = NumInt64sForBits(numHighBitsClear + numHighBitsSet);

            if (numLongsForHighBits > int.MaxValue)
            {
                throw new ArgumentException("numLongsForHighBits too large to index a long array: " + numLongsForHighBits);
            }
            this.upperLongs = new long[(int)numLongsForHighBits];
            if (indexInterval < 2)
            {
                throw new ArgumentException("indexInterval should at least 2: " + indexInterval);
            }
            // For the index:
            long maxHighValue  = (long)((ulong)upperBound >> this.numLowBits);
            long nIndexEntries = maxHighValue / indexInterval; // no zero value index entry

            this.numIndexEntries = (nIndexEntries >= 0) ? nIndexEntries : 0;
            long maxIndexEntry = maxHighValue + numValues - 1; // clear upper bits, set upper bits, start at zero

            this.nIndexEntryBits = (maxIndexEntry <= 0) ? 0 : (64 - maxIndexEntry.LeadingZeroCount());
            long numLongsForIndexBits = NumInt64sForBits(numIndexEntries * nIndexEntryBits);

            if (numLongsForIndexBits > int.MaxValue)
            {
                throw new ArgumentException("numLongsForIndexBits too large to index a long array: " + numLongsForIndexBits);
            }
            this.upperZeroBitPositionIndex = new long[(int)numLongsForIndexBits];
            this.currentEntryIndex         = 0;
            this.indexInterval             = indexInterval;
        }
Beispiel #11
0
        public override bool SeekExact(BytesRef term)
        {
            queue.Clear();
            numTop = 0;

            bool seekOpt = false;
            if (lastSeek != null && termComp.Compare(lastSeek, term) <= 0)
            {
                seekOpt = true;
            }

            lastSeek = null;
            lastSeekExact = true;

            for (int i = 0; i < numSubs; i++)
            {
                bool status;
                // LUCENE-2130: if we had just seek'd already, prior
                // to this seek, and the new seek term is after the
                // previous one, don't try to re-seek this sub if its
                // current term is already beyond this new seek term.
                // Doing so is a waste because this sub will simply
                // seek to the same spot.
                if (seekOpt)
                {
                    BytesRef curTerm = currentSubs[i].Current;
                    if (curTerm != null)
                    {
                        int cmp = termComp.Compare(term, curTerm);
                        if (cmp == 0)
                        {
                            status = true;
                        }
                        else if (cmp < 0)
                        {
                            status = false;
                        }
                        else
                        {
                            status = currentSubs[i].Terms.SeekExact(term);
                        }
                    }
                    else
                    {
                        status = false;
                    }
                }
                else
                {
                    status = currentSubs[i].Terms.SeekExact(term);
                }

                if (status)
                {
                    top[numTop++] = currentSubs[i];
                    current = currentSubs[i].Current = currentSubs[i].Terms.Term;
                    if (Debugging.AssertsEnabled) Debugging.Assert(term.Equals(currentSubs[i].Current));
                }
            }

            // if at least one sub had exact match to the requested
            // term then we found match
            return numTop > 0;
        }
        internal void FinishDocument()
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start"));
            }

            int numPostings = termsHashPerField.bytesHash.Count;

            BytesRef flushTerm = termsWriter.flushTerm;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numPostings >= 0);
            }

            if (numPostings > maxNumPostings)
            {
                maxNumPostings = numPostings;
            }

            // this is called once, after inverting all occurrences
            // of a given field in the doc.  At this point we flush
            // our hash into the DocWriter.

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(termsWriter.VectorFieldsInOrder(fieldInfo));
            }

            TermVectorsPostingsArray postings = (TermVectorsPostingsArray)termsHashPerField.postingsArray;
            TermVectorsWriter        tv       = termsWriter.writer;

            int[] termIDs = termsHashPerField.SortPostings(tv.Comparer);

            tv.StartField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);

            ByteSliceReader posReader = doVectorPositions ? termsWriter.vectorSliceReaderPos : null;
            ByteSliceReader offReader = doVectorOffsets ? termsWriter.vectorSliceReaderOff : null;

            ByteBlockPool termBytePool = termsHashPerField.termBytePool;

            for (int j = 0; j < numPostings; j++)
            {
                int termID = termIDs[j];
                int freq   = postings.freqs[termID];

                // Get BytesRef
                termBytePool.SetBytesRef(flushTerm, postings.textStarts[termID]);
                tv.StartTerm(flushTerm, freq);

                if (doVectorPositions || doVectorOffsets)
                {
                    if (posReader != null)
                    {
                        termsHashPerField.InitReader(posReader, termID, 0);
                    }
                    if (offReader != null)
                    {
                        termsHashPerField.InitReader(offReader, termID, 1);
                    }
                    tv.AddProx(freq, posReader, offReader);
                }
                tv.FinishTerm();
            }
            tv.FinishField();

            termsHashPerField.Reset();

            fieldInfo.SetStoreTermVectors();
        }
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool   success = false;

            fieldInfos = fn;
            numDocs    = si.DocCount;
            ChecksumIndexInput indexStream = null;

            try
            {
                // Load the index into memory
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION);
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer());
                }
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM)
                {
                    indexStream.ReadVInt64(); // the end of the data file
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(indexStream);
#pragma warning restore 612, 618
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION);
                vectorsStream = d.OpenInput(vectorsStreamFN, context);
                string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT;
                int    version2     = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                if (version != version2)
                {
                    throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.GetFilePointer());
                }

                packedIntsVersion = vectorsStream.ReadVInt32();
                chunkSize         = vectorsStream.ReadVInt32();
                decompressor      = compressionMode.NewDecompressor();
                this.reader       = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this, indexStream);
                }
            }
        }
        public override Fields Get(int doc)
        {
            EnsureOpen();

            // seek to the right place
            {
                long startPointer = indexReader.GetStartPointer(doc);
                vectorsStream.Seek(startPointer);
            }

            // decode
            // - docBase: first doc ID of the chunk
            // - chunkDocs: number of docs of the chunk
            int docBase   = vectorsStream.ReadVInt32();
            int chunkDocs = vectorsStream.ReadVInt32();

            if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs)
            {
                throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")");
            }

            int skip;        // number of fields to skip
            int numFields;   // number of fields of the document we're looking for
            int totalFields; // total number of fields of the chunk (sum for all docs)

            if (chunkDocs == 1)
            {
                skip      = 0;
                numFields = totalFields = vectorsStream.ReadVInt32();
            }
            else
            {
                reader.Reset(vectorsStream, chunkDocs);
                int sum = 0;
                for (int i = docBase; i < doc; ++i)
                {
                    sum += (int)reader.Next();
                }
                skip      = sum;
                numFields = (int)reader.Next();
                sum      += numFields;
                for (int i = doc + 1; i < docBase + chunkDocs; ++i)
                {
                    sum += (int)reader.Next();
                }
                totalFields = sum;
            }

            if (numFields == 0)
            {
                // no vectors
                return(null);
            }

            // read field numbers that have term vectors
            int[] fieldNums;
            {
                int token = vectorsStream.ReadByte() & 0xFF;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(token != 0);                           // means no term vectors, cannot happen since we checked for numFields == 0
                }
                int bitsPerFieldNum     = token & 0x1F;
                int totalDistinctFields = (int)((uint)token >> 5);
                if (totalDistinctFields == 0x07)
                {
                    totalDistinctFields += vectorsStream.ReadVInt32();
                }
                ++totalDistinctFields;
                PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
                fieldNums = new int[totalDistinctFields];
                for (int i = 0; i < totalDistinctFields; ++i)
                {
                    fieldNums[i] = (int)it.Next();
                }
            }

            // read field numbers and flags
            int[] fieldNumOffs = new int[numFields];
            PackedInt32s.Reader flags;
            {
                int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1);
                PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
                switch (vectorsStream.ReadVInt32())
                {
                case 0:
                    PackedInt32s.Reader  fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS);
                    PackedInt32s.Mutable f          = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT);
                    for (int i = 0; i < totalFields; ++i)
                    {
                        int fieldNumOff = (int)allFieldNumOffs.Get(i);
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length);
                        }
                        int fgs = (int)fieldFlags.Get(fieldNumOff);
                        f.Set(i, fgs);
                    }
                    flags = f;
                    break;

                case 1:
                    flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS);
                    break;

                default:
                    throw new Exception();
                }
                for (int i = 0; i < numFields; ++i)
                {
                    fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i);
                }
            }

            // number of terms per field for all fields
            PackedInt32s.Reader numTerms;
            int totalTerms;
            {
                int bitsRequired = vectorsStream.ReadVInt32();
                numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
                int sum = 0;
                for (int i = 0; i < totalFields; ++i)
                {
                    sum += (int)numTerms.Get(i);
                }
                totalTerms = sum;
            }

            // term lengths
            int docOff = 0, docLen = 0, totalLen;

            int[]   fieldLengths  = new int[numFields];
            int[][] prefixLengths = new int[numFields][];
            int[][] suffixLengths = new int[numFields][];
            {
                reader.Reset(vectorsStream, totalTerms);
                // skip
                int toSkip = 0;
                for (int i = 0; i < skip; ++i)
                {
                    toSkip += (int)numTerms.Get(i);
                }
                reader.Skip(toSkip);
                // read prefix lengths
                for (int i = 0; i < numFields; ++i)
                {
                    int   termCount          = (int)numTerms.Get(skip + i);
                    int[] fieldPrefixLengths = new int[termCount];
                    prefixLengths[i] = fieldPrefixLengths;
                    for (int j = 0; j < termCount;)
                    {
                        Int64sRef next = reader.Next(termCount - j);
                        for (int k = 0; k < next.Length; ++k)
                        {
                            fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k];
                        }
                    }
                }
                reader.Skip(totalTerms - reader.Ord);

                reader.Reset(vectorsStream, totalTerms);
                // skip
                toSkip = 0;
                for (int i = 0; i < skip; ++i)
                {
                    for (int j = 0; j < numTerms.Get(i); ++j)
                    {
                        docOff += (int)reader.Next();
                    }
                }
                for (int i = 0; i < numFields; ++i)
                {
                    int   termCount          = (int)numTerms.Get(skip + i);
                    int[] fieldSuffixLengths = new int[termCount];
                    suffixLengths[i] = fieldSuffixLengths;
                    for (int j = 0; j < termCount;)
                    {
                        Int64sRef next = reader.Next(termCount - j);
                        for (int k = 0; k < next.Length; ++k)
                        {
                            fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k];
                        }
                    }
                    fieldLengths[i] = Sum(suffixLengths[i]);
                    docLen         += fieldLengths[i];
                }
                totalLen = docOff + docLen;
                for (int i = skip + numFields; i < totalFields; ++i)
                {
                    for (int j = 0; j < numTerms.Get(i); ++j)
                    {
                        totalLen += (int)reader.Next();
                    }
                }
            }

            // term freqs
            int[] termFreqs = new int[totalTerms];
            {
                reader.Reset(vectorsStream, totalTerms);
                for (int i = 0; i < totalTerms;)
                {
                    Int64sRef next = reader.Next(totalTerms - i);
                    for (int k = 0; k < next.Length; ++k)
                    {
                        termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k];
                    }
                }
            }

            // total number of positions, offsets and payloads
            int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;

            for (int i = 0, termIndex = 0; i < totalFields; ++i)
            {
                int f         = (int)flags.Get(i);
                int termCount = (int)numTerms.Get(i);
                for (int j = 0; j < termCount; ++j)
                {
                    int freq = termFreqs[termIndex++];
                    if ((f & CompressingTermVectorsWriter.POSITIONS) != 0)
                    {
                        totalPositions += freq;
                    }
                    if ((f & CompressingTermVectorsWriter.OFFSETS) != 0)
                    {
                        totalOffsets += freq;
                    }
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        totalPayloads += freq;
                    }
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(i != totalFields - 1 || termIndex == totalTerms, () => termIndex + " " + totalTerms);
                }
            }

            int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs);
            int[][] positions, startOffsets, lengths;
            if (totalPositions > 0)
            {
                positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex);
            }
            else
            {
                positions = new int[numFields][];
            }

            if (totalOffsets > 0)
            {
                // average number of chars per term
                float[] charsPerTerm = new float[fieldNums.Length];
                for (int i = 0; i < charsPerTerm.Length; ++i)
                {
                    charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32());
                }
                startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);
                lengths      = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);

                for (int i = 0; i < numFields; ++i)
                {
                    int[] fStartOffsets = startOffsets[i];
                    int[] fPositions    = positions[i];
                    // patch offsets from positions
                    if (fStartOffsets != null && fPositions != null)
                    {
                        float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
                        for (int j = 0; j < startOffsets[i].Length; ++j)
                        {
                            fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]);
                        }
                    }
                    if (fStartOffsets != null)
                    {
                        int[] fPrefixLengths = prefixLengths[i];
                        int[] fSuffixLengths = suffixLengths[i];
                        int[] fLengths       = lengths[i];
                        for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
                        {
                            // delta-decode start offsets and  patch lengths using term lengths
                            int termLength = fPrefixLengths[j] + fSuffixLengths[j];
                            lengths[i][positionIndex[i][j]] += termLength;
                            for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k)
                            {
                                fStartOffsets[k] += fStartOffsets[k - 1];
                                fLengths[k]      += termLength;
                            }
                        }
                    }
                }
            }
            else
            {
                startOffsets = lengths = new int[numFields][];
            }
            if (totalPositions > 0)
            {
                // delta-decode positions
                for (int i = 0; i < numFields; ++i)
                {
                    int[] fPositions     = positions[i];
                    int[] fpositionIndex = positionIndex[i];
                    if (fPositions != null)
                    {
                        for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
                        {
                            // delta-decode start offsets
                            for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k)
                            {
                                fPositions[k] += fPositions[k - 1];
                            }
                        }
                    }
                }
            }

            // payload lengths
            int[][] payloadIndex       = new int[numFields][];
            int     totalPayloadLength = 0;
            int     payloadOff         = 0;
            int     payloadLen         = 0;

            if (totalPayloads > 0)
            {
                reader.Reset(vectorsStream, totalPayloads);
                // skip
                int termIndex = 0;
                for (int i = 0; i < skip; ++i)
                {
                    int f         = (int)flags.Get(i);
                    int termCount = (int)numTerms.Get(i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                int l = (int)reader.Next();
                                payloadOff += l;
                            }
                        }
                    }
                    termIndex += termCount;
                }
                totalPayloadLength = payloadOff;
                // read doc payload lengths
                for (int i = 0; i < numFields; ++i)
                {
                    int f         = (int)flags.Get(skip + i);
                    int termCount = (int)numTerms.Get(skip + i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        int totalFreq = positionIndex[i][termCount];
                        payloadIndex[i] = new int[totalFreq + 1];
                        int posIdx = 0;
                        payloadIndex[i][posIdx] = payloadLen;
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                int payloadLength = (int)reader.Next();
                                payloadLen += payloadLength;
                                payloadIndex[i][posIdx + 1] = payloadLen;
                                ++posIdx;
                            }
                        }
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(posIdx == totalFreq);
                        }
                    }
                    termIndex += termCount;
                }
                totalPayloadLength += payloadLen;
                for (int i = skip + numFields; i < totalFields; ++i)
                {
                    int f         = (int)flags.Get(i);
                    int termCount = (int)numTerms.Get(i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                totalPayloadLength += (int)reader.Next();
                            }
                        }
                    }
                    termIndex += termCount;
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termIndex == totalTerms, () => termIndex + " " + totalTerms);
                }
            }

            // decompress data
            BytesRef suffixBytes = new BytesRef();

            decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes);
            suffixBytes.Length = docLen;
            BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen);

            int[] FieldFlags = new int[numFields];
            for (int i = 0; i < numFields; ++i)
            {
                FieldFlags[i] = (int)flags.Get(skip + i);
            }

            int[] fieldNumTerms = new int[numFields];
            for (int i = 0; i < numFields; ++i)
            {
                fieldNumTerms[i] = (int)numTerms.Get(skip + i);
            }

            int[][] fieldTermFreqs = new int[numFields][];
            {
                int termIdx = 0;
                for (int i = 0; i < skip; ++i)
                {
                    termIdx += (int)numTerms.Get(i);
                }
                for (int i = 0; i < numFields; ++i)
                {
                    int termCount = (int)numTerms.Get(skip + i);
                    fieldTermFreqs[i] = new int[termCount];
                    for (int j = 0; j < termCount; ++j)
                    {
                        fieldTermFreqs[i][j] = termFreqs[termIdx++];
                    }
                }
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(Sum(fieldLengths) == docLen, () => Sum(fieldLengths) + " != " + docLen);
            }

            return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes));
        }
Beispiel #15
0
        internal readonly bool isSegmentPrivate; // set to true iff this frozen packet represents
        // a segment private deletes. in that case is should
        // only have Queries

        public FrozenBufferedUpdates(BufferedUpdates deletes, bool isSegmentPrivate)
        {
            this.isSegmentPrivate = isSegmentPrivate;
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(!isSegmentPrivate || deletes.terms.Count == 0, () => "segment private package should only have del queries");
            }
            Term[] termsArray = deletes.terms.Keys.ToArray(/*new Term[deletes.terms.Count]*/);

            termCount = termsArray.Length;
            ArrayUtil.TimSort(termsArray);
            PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
            foreach (Term term in termsArray)
            {
                builder.Add(term);
            }
            terms = builder.Finish();

            queries     = new Query[deletes.queries.Count];
            queryLimits = new int[deletes.queries.Count];
            int upto = 0;

            foreach (KeyValuePair <Query, int?> ent in deletes.queries)
            {
                queries[upto] = ent.Key;
                if (ent.Value.HasValue)
                {
                    queryLimits[upto] = ent.Value.Value;
                }
                else
                {
                    // LUCENENET NOTE: According to this: http://stackoverflow.com/a/13914344
                    // we are supposed to throw an exception in this case, rather than
                    // silently fail.
                    throw new NullReferenceException();
                }
                upto++;
            }

            // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
            // so that it maps to all fields it affects, sorted by their docUpto, and traverse
            // that Term only once, applying the update to all fields that still need to be
            // updated.
            IList <NumericDocValuesUpdate> allNumericUpdates = new List <NumericDocValuesUpdate>();
            int numericUpdatesSize = 0;

            foreach (var numericUpdates in deletes.numericUpdates.Values)
            {
                foreach (NumericDocValuesUpdate update in numericUpdates.Values)
                {
                    allNumericUpdates.Add(update);
                    numericUpdatesSize += update.GetSizeInBytes();
                }
            }
            numericDVUpdates = allNumericUpdates.ToArray();

            // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
            // so that it maps to all fields it affects, sorted by their docUpto, and traverse
            // that Term only once, applying the update to all fields that still need to be
            // updated.
            IList <BinaryDocValuesUpdate> allBinaryUpdates = new List <BinaryDocValuesUpdate>();
            int binaryUpdatesSize = 0;

            foreach (var binaryUpdates in deletes.binaryUpdates.Values)
            {
                foreach (BinaryDocValuesUpdate update in binaryUpdates.Values)
                {
                    allBinaryUpdates.Add(update);
                    binaryUpdatesSize += update.GetSizeInBytes();
                }
            }
            binaryDVUpdates = allBinaryUpdates.ToArray();

            bytesUsed = (int)terms.GetSizeInBytes() + queries.Length * BYTES_PER_DEL_QUERY + numericUpdatesSize + numericDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF + binaryUpdatesSize + binaryDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF;

            numTermDeletes = deletes.numTermDeletes;
        }
Beispiel #16
0
        public override DocsAndPositionsEnum DocsAndPositions(IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags)
        {
            // Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
            // ... and was previously created w/ this MultiTermsEnum:
            if (reuse is null || !(reuse is MultiDocsAndPositionsEnum docsAndPositionsEnum) || !docsAndPositionsEnum.CanReuse(this))

                docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.Length);


            int upto = 0;

            for (int i = 0; i < numTop; i++)
            {
                TermsEnumWithSlice entry = top[i];

                IBits b;

                if (liveDocs is MultiBits multiLiveDocs)
                {
                    // Optimize for common case: requested skip docs is a
                    // congruent sub-slice of MultiBits: in this case, we
                    // just pull the liveDocs from the sub reader, rather
                    // than making the inefficient
                    // Slice(Multi(sub-readers)):
                    MultiBits.SubResult sub = multiLiveDocs.GetMatchingSub(top[i].SubSlice);
                    if (sub.Matches)
                    {
                        b = sub.Result;
                    }
                    else
                    {
                        // custom case: requested skip docs is foreign:
                        // must slice it on every access (very
                        // inefficient)
                        b = new BitsSlice(liveDocs, top[i].SubSlice);
                    }
                }
                else if (liveDocs != null)
                {
                    b = new BitsSlice(liveDocs, top[i].SubSlice);
                }
                else
                {
                    // no deletions
                    b = null;
                }

                if (Debugging.AssertsEnabled) Debugging.Assert(entry.Index < docsAndPositionsEnum.subDocsAndPositionsEnum.Length, "{0} vs {1}; {2}", entry.Index, docsAndPositionsEnum.subDocsAndPositionsEnum.Length, subs.Length);
                DocsAndPositionsEnum subPostings = entry.Terms.DocsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.Index], flags);

                if (subPostings != null)
                {
                    docsAndPositionsEnum.subDocsAndPositionsEnum[entry.Index] = subPostings;
                    subDocsAndPositions[upto].DocsAndPositionsEnum = subPostings;
                    subDocsAndPositions[upto].Slice = entry.SubSlice;
                    upto++;
                }
                else
                {
                    if (entry.Terms.Docs(b, null, DocsFlags.NONE) != null)
                    {
                        // At least one of our subs does not store
                        // offsets or positions -- we can't correctly
                        // produce a MultiDocsAndPositions enum
                        return null;
                    }
                }
            }

            if (upto == 0)
            {
                return null;
            }
            else
            {
                return docsAndPositionsEnum.Reset(subDocsAndPositions, upto);
            }
        }
Beispiel #17
0
        /// <exception cref="IOException"/>
        protected override void WriteSkipData(int level, IndexOutput skipBuffer)
        {
            // To efficiently store payloads in the posting lists we do not store the length of
            // every payload. Instead we omit the length for a payload if the previous payload had
            // the same length.
            // However, in order to support skipping the payload length at every skip point must be known.
            // So we use the same length encoding that we use for the posting lists for the skip data as well:
            // Case 1: current field does not store payloads
            //           SkipDatum                 --> DocSkip, FreqSkip, ProxSkip
            //           DocSkip,FreqSkip,ProxSkip --> VInt
            //           DocSkip records the document number before every SkipInterval th  document in TermFreqs.
            //           Document numbers are represented as differences from the previous value in the sequence.
            // Case 2: current field stores payloads
            //           SkipDatum                 --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
            //           DocSkip,FreqSkip,ProxSkip --> VInt
            //           PayloadLength             --> VInt
            //         In this case DocSkip/2 is the difference between
            //         the current and the previous value. If DocSkip
            //         is odd, then a PayloadLength encoded as VInt follows,
            //         if DocSkip is even, then it is assumed that the
            //         current payload length equals the length at the previous
            //         skip point

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads);
            }

            if (curStorePayloads)
            {
                int delta = curDoc - lastSkipDoc[level];
                if (curPayloadLength == lastSkipPayloadLength[level])
                {
                    // the current payload length equals the length at the previous skip point,
                    // so we don't store the length again
                    skipBuffer.WriteVInt32(delta << 1);
                }
                else
                {
                    // the payload length is different from the previous one. We shift the DocSkip,
                    // set the lowest bit and store the current payload length as VInt.
                    skipBuffer.WriteVInt32(delta << 1 | 1);
                    skipBuffer.WriteVInt32(curPayloadLength);
                    lastSkipPayloadLength[level] = curPayloadLength;
                }
            }
            else
            {
                // current field does not store payloads
                skipBuffer.WriteVInt32(curDoc - lastSkipDoc[level]);
            }

            if (indexOptions != IndexOptions.DOCS_ONLY)
            {
                freqIndex[level].Mark();
                freqIndex[level].Write(skipBuffer, false);
            }
            docIndex[level].Mark();
            docIndex[level].Write(skipBuffer, false);
            if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                posIndex[level].Mark();
                posIndex[level].Write(skipBuffer, false);
                if (curStorePayloads)
                {
                    skipBuffer.WriteVInt32((int)(curPayloadPointer - lastSkipPayloadPointer[level]));
                }
            }

            lastSkipDoc[level]            = curDoc;
            lastSkipPayloadPointer[level] = curPayloadPointer;
        }
Beispiel #18
0
 public TermsEnumWithSlice(int index, ReaderSlice subSlice)
 {
     this.SubSlice = subSlice;
     this.Index = index;
     if (Debugging.AssertsEnabled) Debugging.Assert(subSlice.Length >= 0,"length={0}", subSlice.Length);
 }
Beispiel #19
0
        private IEnumerable <long?> GetMergeSortedSetOrdsEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs, OrdinalMap map)
        {
            int          readerUpto      = -1;
            int          docIDUpto       = 0;
            AtomicReader currentReader   = null;
            IBits        currentLiveDocs = null;
            var          ords            = new long[8];
            int          ordUpto         = 0;
            int          ordLength       = 0;

            while (true)
            {
                if (readerUpto == readers.Length)
                {
                    yield break;
                }

                if (ordUpto < ordLength)
                {
                    var value = ords[ordUpto];
                    ordUpto++;
                    yield return(value);

                    continue;
                }

                if (currentReader == null || docIDUpto == currentReader.MaxDoc)
                {
                    readerUpto++;
                    if (readerUpto < readers.Length)
                    {
                        currentReader   = readers[readerUpto];
                        currentLiveDocs = currentReader.LiveDocs;
                    }
                    docIDUpto = 0;
                    continue;
                }

                if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto))
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(docIDUpto < currentReader.MaxDoc);
                    }
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.SetDocument(docIDUpto);
                    ordUpto = ordLength = 0;
                    long ord;
                    while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (ordLength == ords.Length)
                        {
                            ords = ArrayUtil.Grow(ords, ordLength + 1);
                        }
                        ords[ordLength] = map.GetGlobalOrd(readerUpto, ord);
                        ordLength++;
                    }
                    docIDUpto++;
                    continue;
                }

                docIDUpto++;
            }
        }
Beispiel #20
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(()
                                                                               => (StoredFieldsReader)fieldsReaderOrig.Clone());
            termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(()
                                                                             => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone());

            if (termsIndexDivisor == 0)
            {
                throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fields != null);
                }
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(normsProducer != null);
                    }
                }
                else
                {
                    normsProducer = null;
                }

                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
Beispiel #21
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos,
                                   IOContext context)
        {
            var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
            var output   = directory.CreateOutput(fileName, context);
            var scratch  = new BytesRef();
            var success  = false;

            try
            {
                SimpleTextUtil.Write(output, NUMFIELDS);
                SimpleTextUtil.Write(output, infos.Count.ToString(CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(output);

                foreach (FieldInfo fi in infos)
                {
                    SimpleTextUtil.Write(output, NAME);
                    SimpleTextUtil.Write(output, fi.Name, scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NUMBER);
                    SimpleTextUtil.Write(output, fi.Number.ToString(CultureInfo.InvariantCulture), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, ISINDEXED);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.IsIndexed.ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    if (fi.IsIndexed)
                    {
                        // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(IndexOptionsComparer.Default.Compare(fi.IndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads);
                        }
                        SimpleTextUtil.Write(output, INDEXOPTIONS);
                        SimpleTextUtil.Write(output,
                                             fi.IndexOptions != IndexOptions.NONE ? fi.IndexOptions.ToString() : string.Empty,
                                             scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }

                    SimpleTextUtil.Write(output, STORETV);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasVectors.ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, PAYLOADS);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasPayloads.ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NORMS);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower((!fi.OmitsNorms).ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NORMS_TYPE);
                    SimpleTextUtil.Write(output, GetDocValuesType(fi.NormType), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, DOCVALUES);
                    SimpleTextUtil.Write(output, GetDocValuesType(fi.DocValuesType), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, DOCVALUES_GEN);
                    SimpleTextUtil.Write(output, fi.DocValuesGen.ToString(CultureInfo.InvariantCulture), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    IDictionary <string, string> atts = fi.Attributes;
                    int numAtts = atts is null ? 0 : atts.Count;
                    SimpleTextUtil.Write(output, NUM_ATTS);
                    SimpleTextUtil.Write(output, numAtts.ToString(CultureInfo.InvariantCulture), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    if (numAtts <= 0 || atts is null)
                    {
                        continue;
                    }
                    foreach (var entry in atts)
                    {
                        SimpleTextUtil.Write(output, ATT_KEY);
                        SimpleTextUtil.Write(output, entry.Key, scratch);
                        SimpleTextUtil.WriteNewline(output);

                        SimpleTextUtil.Write(output, ATT_VALUE);
                        SimpleTextUtil.Write(output, entry.Value, scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }
                }
                SimpleTextUtil.WriteChecksum(output, scratch);
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(output);
                }
            }
        }
        public override ThreadState GetAndLock(Thread requestingThread, DocumentsWriter documentsWriter)
        {
            ThreadState threadState = null;

            if (NumThreadStatesActive == 0)
            {
                lock (this)
                {
                    if (NumThreadStatesActive == 0)
                    {
                        threadState = states[0] = NewThreadState();
                        return(threadState);
                    }
                }
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(NumThreadStatesActive > 0);
            }
            for (int i = 0; i < maxRetry; i++)
            {
                int ord = random.Next(NumThreadStatesActive);
                lock (this)
                {
                    threadState = states[ord];
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(threadState != null);
                    }
                }

                if (threadState.TryLock())
                {
                    return(threadState);
                }
                if (random.Next(20) == 0)
                {
                    break;
                }
            }

            /*
             * only try to create a new threadstate if we can not lock the randomly
             * selected state. this is important since some tests rely on a single
             * threadstate in the single threaded case. Eventually it would be nice if
             * we would not have this limitation but for now we just make sure we only
             * allocate one threadstate if indexing is single threaded
             */

            lock (this)
            {
                ThreadState newThreadState = NewThreadState();
                if (newThreadState != null) // did we get a new state?
                {
                    threadState = states[NumThreadStatesActive - 1] = newThreadState;
                    //if (Debugging.AssertsEnabled) Debugging.Assert(threadState.HeldByCurrentThread);
                    return(threadState);
                }
                // if no new state is available lock the random one
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(threadState != null);
            }
            threadState.@Lock();
            return(threadState);
        }
Beispiel #23
0
        /// <summary>
        /// Determinizes the given automaton.
        /// <para/>
        /// Worst case complexity: exponential in number of states.
        /// </summary>
        public static void Determinize(Automaton a)
        {
            if (a.IsDeterministic || a.IsSingleton)
            {
                return;
            }

            State[] allStates = a.GetNumberedStates();

            // subset construction
            bool initAccept = a.initial.accept;
            int  initNumber = a.initial.number;

            a.initial = new State();
            SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);

            Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList
            IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>();

            worklist.Enqueue(initialset);

            a.initial.accept     = initAccept;
            newstate[initialset] = a.initial;

            int newStateUpto = 0;

            State[] newStatesArray = new State[5];
            newStatesArray[newStateUpto] = a.initial;
            a.initial.number             = newStateUpto;
            newStateUpto++;

            // like Set<Integer,PointTransitions>
            PointTransitionSet points = new PointTransitionSet();

            // like SortedMap<Integer,Integer>
            SortedInt32Set statesSet = new SortedInt32Set(5);

            while (worklist.Count > 0)
            {
                SortedInt32Set.FrozenInt32Set s = worklist.Dequeue();
                //worklist.Remove(s);

                // Collate all outgoing transitions by min/1+max:
                for (int i = 0; i < s.values.Length; i++)
                {
                    State s0 = allStates[s.values[i]];
                    for (int j = 0; j < s0.numTransitions; j++)
                    {
                        points.Add(s0.TransitionsArray[j]);
                    }
                }

                if (points.count == 0)
                {
                    // No outgoing transitions -- skip it
                    continue;
                }

                points.Sort();

                int lastPoint = -1;
                int accCount  = 0;

                State r = s.state;
                for (int i = 0; i < points.count; i++)
                {
                    int point = points.points[i].point;

                    if (statesSet.upto > 0)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(lastPoint != -1);
                        }

                        statesSet.ComputeHash();

                        if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q == null)
                        {
                            q = new State();

                            SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
                            worklist.Enqueue(p);
                            if (newStateUpto == newStatesArray.Length)
                            {
                                // LUCENENET: Resize rather than copy
                                Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
                            }
                            newStatesArray[newStateUpto] = q;
                            q.number = newStateUpto;
                            newStateUpto++;
                            q.accept    = accCount > 0;
                            newstate[p] = q;
                        }
                        else
                        {
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet);
                            }
                        }

                        r.AddTransition(new Transition(lastPoint, point - 1, q));
                    }

                    // process transitions that end on this point
                    // (closes an overlapping interval)
                    Transition[] transitions = points.points[i].ends.transitions;
                    int          limit       = points.points[i].ends.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Decr(num);
                        accCount -= t.to.accept ? 1 : 0;
                    }
                    points.points[i].ends.count = 0;

                    // process transitions that start on this point
                    // (opens a new interval)
                    transitions = points.points[i].starts.transitions;
                    limit       = points.points[i].starts.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Incr(num);
                        accCount += t.to.accept ? 1 : 0;
                    }
                    lastPoint = point;
                    points.points[i].starts.count = 0;
                }
                points.Reset();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto);
                }
            }
            a.deterministic = true;
            a.SetNumberedStates(newStatesArray, newStateUpto);
        }
Beispiel #24
0
            /// <summary>
            /// Builds an <see cref="SynonymMap"/> and returns it.
            /// </summary>
            public virtual SynonymMap Build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
                // TODO: are we using the best sharing options?
                var builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

                BytesRef            scratch       = new BytesRef(64);
                ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();

                ISet <int> dedupSet;

                if (dedup)
                {
                    dedupSet = new JCG.HashSet <int>();
                }
                else
                {
                    dedupSet = null;
                }


                var spare = new byte[5];

                ICollection <CharsRef> keys = workingSet.Keys;

                CharsRef[] sortedKeys = new CharsRef[keys.Count];
                keys.CopyTo(sortedKeys, 0);
#pragma warning disable 612, 618
                System.Array.Sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparer);
#pragma warning restore 612, 618


                Int32sRef scratchIntsRef = new Int32sRef();

                //System.out.println("fmap.build");
                for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++)
                {
                    CharsRef input  = sortedKeys[keyIdx];
                    MapEntry output = workingSet[input];

                    int numEntries = output.ords.Count;
                    // output size, assume the worst case
                    int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry

                    scratch.Grow(estimatedSize);
                    scratchOutput.Reset(scratch.Bytes, scratch.Offset, scratch.Bytes.Length);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(scratch.Offset == 0);
                    }

                    // now write our output data:
                    int count = 0;
                    for (int i = 0; i < numEntries; i++)
                    {
                        if (dedupSet != null)
                        {
                            // LUCENENET specific - no boxing happening here
                            int ent = output.ords[i];
                            if (dedupSet.Contains(ent))
                            {
                                continue;
                            }
                            dedupSet.Add(ent);
                        }
                        scratchOutput.WriteVInt32(output.ords[i]);
                        count++;
                    }

                    int pos = scratchOutput.Position;
                    scratchOutput.WriteVInt32(count << 1 | (output.includeOrig ? 0 : 1));
                    int pos2    = scratchOutput.Position;
                    int vIntLen = pos2 - pos;

                    // Move the count + includeOrig to the front of the byte[]:
                    Array.Copy(scratch.Bytes, pos, spare, 0, vIntLen);
                    Array.Copy(scratch.Bytes, 0, scratch.Bytes, vIntLen, pos);
                    Array.Copy(spare, 0, scratch.Bytes, 0, vIntLen);

                    if (dedupSet != null)
                    {
                        dedupSet.Clear();
                    }

                    scratch.Length = scratchOutput.Position - scratch.Offset;
                    //System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
                    builder.Add(Lucene.Net.Util.Fst.Util.ToUTF32(input.ToString(), scratchIntsRef), BytesRef.DeepCopyOf(scratch));
                }

                FST <BytesRef> fst = builder.Finish();
                return(new SynonymMap(fst, words, maxHorizontalContext));
            }
        public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(!UninterruptableMonitor.IsEntered(writer));
                }

                this.m_writer = writer;

                InitMergeThreadPriority();

                m_dir = writer.Directory;

                // First, quickly run through the newly proposed merges
                // and add any orthogonal merges (ie a merge not
                // involving segments already pending to be merged) to
                // the queue.  If we are way behind on merging, many of
                // these newly proposed merges will likely already be
                // registered.

                if (IsVerbose)
                {
                    Message("now merge");
                    Message("  index: " + writer.SegString());
                }

                // Iterate, pulling from the IndexWriter's queue of
                // pending merges, until it's empty:
                while (true)
                {
                    long startStallTime = 0;
                    while (writer.HasPendingMerges() && MergeThreadCount >= maxMergeCount)
                    {
                        // this means merging has fallen too far behind: we
                        // have already created maxMergeCount threads, and
                        // now there's at least one more merge pending.
                        // Note that only maxThreadCount of
                        // those created merge threads will actually be
                        // running; the rest will be paused (see
                        // updateMergeThreads).  We stall this producer
                        // thread to prevent creation of new segments,
                        // until merging has caught up:
                        startStallTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                        if (IsVerbose)
                        {
                            Message("    too many merges; stalling...");
                        }
                        try
                        {
                            UninterruptableMonitor.Wait(this);
                        }
                        catch (Exception ie) when(ie.IsInterruptedException())
                        {
                            throw new Util.ThreadInterruptedException(ie);
                        }
                    }

                    if (IsVerbose)
                    {
                        if (startStallTime != 0)
                        {
                            Message("  stalled for " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startStallTime) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                        }
                    }

                    MergePolicy.OneMerge merge = writer.NextMerge();
                    if (merge == null)
                    {
                        if (IsVerbose)
                        {
                            Message("  no more merges pending; now return");
                        }
                        return;
                    }

                    bool success = false;
                    try
                    {
                        if (IsVerbose)
                        {
                            Message("  consider merge " + writer.SegString(merge.Segments));
                        }

                        // OK to spawn a new merge thread to handle this
                        // merge:
                        MergeThread merger = GetMergeThread(writer, merge);
                        m_mergeThreads.Add(merger);
                        if (IsVerbose)
                        {
                            Message("    launch new thread [" + merger.Name + "]");
                        }

                        merger.Start();

                        // Must call this after starting the thread else
                        // the new thread is removed from mergeThreads
                        // (since it's not alive yet):
                        UpdateMergeThreads();

                        success = true;
                    }
                    finally
                    {
                        if (!success)
                        {
                            writer.MergeFinish(merge);
                        }
                    }
                }
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Beispiel #26
0
        /// <summary>
        /// Compute the intersection of the provided sets. This method is much faster than
        /// computing the intersection manually since it operates directly at the byte level.
        /// </summary>
        public static WAH8DocIdSet Intersect(ICollection <WAH8DocIdSet> docIdSets, int indexInterval)
        {
            // LUCENENET: Added guard clause for null
            if (docIdSets is null)
            {
                throw new ArgumentNullException(nameof(docIdSets));
            }

            switch (docIdSets.Count)
            {
            case 0:
                throw new ArgumentException("There must be at least one set to intersect");

            case 1:
                return(docIdSets.First());
            }
            // The logic below is similar to ConjunctionScorer
            int numSets   = docIdSets.Count;
            var iterators = new Iterator[numSets];
            int i         = 0;

            foreach (WAH8DocIdSet set in docIdSets)
            {
                var it = (Iterator)set.GetIterator();
                iterators[i++] = it;
            }
            Array.Sort(iterators, SERIALIZED_LENGTH_COMPARER);
            WordBuilder builder = (WordBuilder)(new WordBuilder()).SetIndexInterval(indexInterval);
            int         wordNum = 0;

            while (true)
            {
                // Advance the least costly iterator first
                iterators[0].AdvanceWord(wordNum);
                wordNum = iterators[0].wordNum;
                if (wordNum == DocIdSetIterator.NO_MORE_DOCS)
                {
                    break;
                }
                byte word = iterators[0].word;
                for (i = 1; i < numSets; ++i)
                {
                    if (iterators[i].wordNum < wordNum)
                    {
                        iterators[i].AdvanceWord(wordNum);
                    }
                    if (iterators[i].wordNum > wordNum)
                    {
                        wordNum = iterators[i].wordNum;
                        goto mainContinue;
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(iterators[i].wordNum == wordNum);
                    }
                    word &= iterators[i].word;
                    if (word == 0)
                    {
                        // There are common words, but they don't share any bit
                        ++wordNum;
                        goto mainContinue;
                    }
                }
                // Found a common word
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(word != 0);
                }
                builder.AddWord(wordNum, word);
                ++wordNum;
                mainContinue :;
            }
            //mainBreak:
            return(builder.Build());
        }
Beispiel #27
0
        /// <summary>
        /// Given a <paramref name="target"/> value, advance the decoding index to the first bigger or equal value
        /// and return it if it is available. Otherwise return <see cref="NO_MORE_VALUES"/>.
        /// <para/>
        /// The current implementation uses the index on the upper zero bit positions.
        /// </summary>
        public virtual long AdvanceToValue(long target)
        {
            efIndex += 1;
            if (efIndex >= numEncoded)
            {
                return(NO_MORE_VALUES);
            }
            setBitForIndex += 1; // the high bit at setBitForIndex belongs to the unary code for efIndex

            int  highIndex = (int)((long)((ulong)setBitForIndex >> LOG2_INT64_SIZE));
            long upperLong = efEncoder.upperLongs[highIndex];

            curHighLong = (long)((ulong)upperLong >> ((int)(setBitForIndex & ((sizeof(long) * 8) - 1)))); // may contain the unary 1 bit for efIndex

            // determine index entry to advance to
            long highTarget = (long)((ulong)target >> efEncoder.numLowBits);

            long indexEntryIndex = (highTarget / efEncoder.indexInterval) - 1;

            if (indexEntryIndex >= 0) // not before first index entry
            {
                if (indexEntryIndex >= numIndexEntries)
                {
                    indexEntryIndex = numIndexEntries - 1; // no further than last index entry
                }
                long indexHighValue = (indexEntryIndex + 1) * efEncoder.indexInterval;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(indexHighValue <= highTarget);
                }
                if (indexHighValue > (setBitForIndex - efIndex)) // advance to just after zero bit position of index entry.
                {
                    setBitForIndex = UnPackValue(efEncoder.upperZeroBitPositionIndex, efEncoder.nIndexEntryBits, indexEntryIndex, indexMask);
                    efIndex        = setBitForIndex - indexHighValue; // the high bit at setBitForIndex belongs to the unary code for efIndex
                    highIndex      = (int)(((ulong)setBitForIndex >> LOG2_INT64_SIZE));
                    upperLong      = efEncoder.upperLongs[highIndex];
                    curHighLong    = (long)((ulong)upperLong >> ((int)(setBitForIndex & ((sizeof(long) * 8) - 1)))); // may contain the unary 1 bit for efIndex
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(efIndex < numEncoded);                           // there is a high value to be found.
                }
            }

            int curSetBits   = curHighLong.PopCount();                                                               // shifted right.
            int curClearBits = (sizeof(long) * 8) - curSetBits - ((int)(setBitForIndex & ((sizeof(long) * 8) - 1))); // subtract right shift, may be more than encoded

            while (((setBitForIndex - efIndex) + curClearBits) < highTarget)
            {
                // curHighLong has not enough clear bits to reach highTarget
                efIndex += curSetBits;
                if (efIndex >= numEncoded)
                {
                    return(NO_MORE_VALUES);
                }
                setBitForIndex += (sizeof(long) * 8) - (setBitForIndex & ((sizeof(long) * 8) - 1));
                // highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert((highIndex + 1) == (int)((long)((ulong)setBitForIndex >> LOG2_INT64_SIZE)));
                }
                highIndex   += 1;
                upperLong    = efEncoder.upperLongs[highIndex];
                curHighLong  = upperLong;
                curSetBits   = curHighLong.PopCount();
                curClearBits = (sizeof(long) * 8) - curSetBits;
            }
            // curHighLong has enough clear bits to reach highTarget, and may not have enough set bits.
            while (curHighLong == 0L)
            {
                setBitForIndex += (sizeof(long) * 8) - (setBitForIndex & ((sizeof(long) * 8) - 1));
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert((highIndex + 1) == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE));
                }
                highIndex  += 1;
                upperLong   = efEncoder.upperLongs[highIndex];
                curHighLong = upperLong;
            }

            // curHighLong has enough clear bits to reach highTarget, has at least 1 set bit, and may not have enough set bits.
            int rank = (int)(highTarget - (setBitForIndex - efIndex)); // the rank of the zero bit for highValue.

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert((rank <= (sizeof(long) * 8)), "rank {0}", rank);
            }
            if (rank >= 1)
            {
                long invCurHighLong   = ~curHighLong;
                int  clearBitForValue = (rank <= 8) ? BroadWord.SelectNaive(invCurHighLong, rank) : BroadWord.Select(invCurHighLong, rank);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(clearBitForValue <= ((sizeof(long) * 8) - 1));
                }
                setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero
                int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
                efIndex += oneBitsBeforeClearBit;       // the high bit at setBitForIndex and belongs to the unary code for efIndex
                if (efIndex >= numEncoded)
                {
                    return(NO_MORE_VALUES);
                }

                if ((setBitForIndex & ((sizeof(long) * 8) - 1)) == 0L) // exhausted curHighLong
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert((highIndex + 1) == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE));
                    }
                    highIndex  += 1;
                    upperLong   = efEncoder.upperLongs[highIndex];
                    curHighLong = upperLong;
                }
                else
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(highIndex == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE));
                    }
                    curHighLong = (long)((ulong)upperLong >> ((int)(setBitForIndex & ((sizeof(long) * 8) - 1))));
                }
                // curHighLong has enough clear bits to reach highTarget, and may not have enough set bits.

                while (curHighLong == 0L)
                {
                    setBitForIndex += (sizeof(long) * 8) - (setBitForIndex & ((sizeof(long) * 8) - 1));
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert((highIndex + 1) == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE));
                    }
                    highIndex  += 1;
                    upperLong   = efEncoder.upperLongs[highIndex];
                    curHighLong = upperLong;
                }
            }
            setBitForIndex += curHighLong.TrailingZeroCount();
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert((setBitForIndex - efIndex) >= highTarget);                           // highTarget reached
            }
            // Linear search also with low values
            long currentValue = CombineHighLowValues((setBitForIndex - efIndex), CurrentLowValue());

            while (currentValue < target)
            {
                currentValue = NextValue();
                if (currentValue == NO_MORE_VALUES)
                {
                    return(NO_MORE_VALUES);
                }
            }
            return(currentValue);
        }
Beispiel #28
0
            internal virtual void AddWord(int wordNum, byte word)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(wordNum > lastWordNum);
                    Debugging.Assert(word != 0);
                }

                if (!reverse)
                {
                    if (lastWordNum == -1)
                    {
                        clean = 2 + wordNum; // special case for the 1st sequence
                        dirtyWords.WriteByte(word);
                    }
                    else
                    {
                        switch (wordNum - lastWordNum)
                        {
                        case 1:
                            if (word == 0xFF && (byte)dirtyWords.Bytes[dirtyWords.Length - 1] == 0xFF)
                            {
                                --dirtyWords.Length;
                                WriteSequence();
                                reverse = true;
                                clean   = 2;
                            }
                            else
                            {
                                dirtyWords.WriteByte(word);
                            }
                            break;

                        case 2:
                            dirtyWords.WriteByte(0);
                            dirtyWords.WriteByte(word);
                            break;

                        default:
                            WriteSequence();
                            clean = wordNum - lastWordNum - 1;
                            dirtyWords.WriteByte(word);
                            break;
                        }
                    }
                }
                else
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(lastWordNum >= 0);
                    }
                    switch (wordNum - lastWordNum)
                    {
                    case 1:
                        if (word == 0xFF)
                        {
                            if (dirtyWords.Length == 0)
                            {
                                ++clean;
                            }
                            else if ((byte)dirtyWords.Bytes[dirtyWords.Length - 1] == 0xFF)
                            {
                                --dirtyWords.Length;
                                WriteSequence();
                                clean = 2;
                            }
                            else
                            {
                                dirtyWords.WriteByte(word);
                            }
                        }
                        else
                        {
                            dirtyWords.WriteByte(word);
                        }
                        break;

                    case 2:
                        dirtyWords.WriteByte(0);
                        dirtyWords.WriteByte(word);
                        break;

                    default:
                        WriteSequence();
                        reverse = false;
                        clean   = wordNum - lastWordNum - 1;
                        dirtyWords.WriteByte(word);
                        break;
                    }
                }
                lastWordNum  = wordNum;
                cardinality += BitUtil.BitCount(word);
            }
        internal void Flush(string fieldName, FieldsConsumer consumer, SegmentWriteState state)
        {
            if (!fieldInfo.IsIndexed)
            {
                return; // nothing to flush, don't bother the codec with the unindexed field
            }

            TermsConsumer        termsConsumer = consumer.AddField(fieldInfo);
            IComparer <BytesRef> termComp      = termsConsumer.Comparer;

            // CONFUSING: this.indexOptions holds the index options
            // that were current when we first saw this field.  But
            // it's possible this has changed, eg when other
            // documents are indexed that cause a "downgrade" of the
            // IndexOptions.  So we must decode the in-RAM buffer
            // according to this.indexOptions, but then write the
            // new segment to the directory according to
            // currentFieldIndexOptions:
            IndexOptions currentFieldIndexOptions = fieldInfo.IndexOptions;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(currentFieldIndexOptions != IndexOptions.NONE);
            }

            // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
            bool writeTermFreq  = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS) >= 0;
            bool writePositions = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
            bool writeOffsets   = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

            bool readTermFreq  = this.hasFreq;
            bool readPositions = this.hasProx;
            bool readOffsets   = this.hasOffsets;

            //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);

            // Make sure FieldInfo.update is working correctly!:
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(!writeTermFreq || readTermFreq);
                Debugging.Assert(!writePositions || readPositions);
                Debugging.Assert(!writeOffsets || readOffsets);

                Debugging.Assert(!writeOffsets || writePositions);
            }

            IDictionary <Term, int?> segDeletes;

            if (state.SegUpdates != null && state.SegUpdates.terms.Count > 0)
            {
                segDeletes = state.SegUpdates.terms;
            }
            else
            {
                segDeletes = null;
            }

            int[]    termIDs  = termsHashPerField.SortPostings(termComp);
            int      numTerms = termsHashPerField.bytesHash.Count;
            BytesRef text     = new BytesRef();
            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;
            ByteSliceReader       freq     = new ByteSliceReader();
            ByteSliceReader       prox     = new ByteSliceReader();

            FixedBitSet visitedDocs      = new FixedBitSet(state.SegmentInfo.DocCount);
            long        sumTotalTermFreq = 0;
            long        sumDocFreq       = 0;

            Term protoTerm = new Term(fieldName);

            for (int i = 0; i < numTerms; i++)
            {
                int termID = termIDs[i];
                // Get BytesRef
                int textStart = postings.textStarts[termID];
                termsHashPerField.bytePool.SetBytesRef(text, textStart);

                termsHashPerField.InitReader(freq, termID, 0);
                if (readPositions || readOffsets)
                {
                    termsHashPerField.InitReader(prox, termID, 1);
                }

                // TODO: really TermsHashPerField should take over most
                // of this loop, including merge sort of terms from
                // multiple threads and interacting with the
                // TermsConsumer, only calling out to us (passing us the
                // DocsConsumer) to handle delivery of docs/positions

                PostingsConsumer postingsConsumer = termsConsumer.StartTerm(text);

                int?delDocLimit;
                if (segDeletes != null)
                {
                    protoTerm.Bytes = text;
                    if (segDeletes.TryGetValue(protoTerm, out int?docIDUpto) && docIDUpto != null)
                    {
                        delDocLimit = docIDUpto;
                    }
                    else
                    {
                        delDocLimit = 0;
                    }
                }
                else
                {
                    delDocLimit = 0;
                }

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                int  docFreq       = 0;
                long totalTermFreq = 0;
                int  docID         = 0;

                while (true)
                {
                    //System.out.println("  cycle");
                    int termFreq;
                    if (freq.Eof())
                    {
                        if (postings.lastDocCodes[termID] != -1)
                        {
                            // Return last doc
                            docID = postings.lastDocIDs[termID];
                            if (readTermFreq)
                            {
                                termFreq = postings.termFreqs[termID];
                            }
                            else
                            {
                                termFreq = -1;
                            }
                            postings.lastDocCodes[termID] = -1;
                        }
                        else
                        {
                            // EOF
                            break;
                        }
                    }
                    else
                    {
                        int code = freq.ReadVInt32();
                        if (!readTermFreq)
                        {
                            docID   += code;
                            termFreq = -1;
                        }
                        else
                        {
                            docID += code.TripleShift(1);
                            if ((code & 1) != 0)
                            {
                                termFreq = 1;
                            }
                            else
                            {
                                termFreq = freq.ReadVInt32();
                            }
                        }

                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docID != postings.lastDocIDs[termID]);
                        }
                    }

                    docFreq++;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(docID < state.SegmentInfo.DocCount, "doc={0} maxDoc={1}", docID, state.SegmentInfo.DocCount);
                    }

                    // NOTE: we could check here if the docID was
                    // deleted, and skip it.  However, this is somewhat
                    // dangerous because it can yield non-deterministic
                    // behavior since we may see the docID before we see
                    // the term that caused it to be deleted.  this
                    // would mean some (but not all) of its postings may
                    // make it into the index, which'd alter the docFreq
                    // for those terms.  We could fix this by doing two
                    // passes, ie first sweep marks all del docs, and
                    // 2nd sweep does the real flush, but I suspect
                    // that'd add too much time to flush.
                    visitedDocs.Set(docID);
                    postingsConsumer.StartDoc(docID, writeTermFreq ? termFreq : -1);
                    if (docID < delDocLimit)
                    {
                        // Mark it deleted.  TODO: we could also skip
                        // writing its postings; this would be
                        // deterministic (just for this Term's docs).

                        // TODO: can we do this reach-around in a cleaner way????
                        if (state.LiveDocs == null)
                        {
                            state.LiveDocs = docState.docWriter.codec.LiveDocsFormat.NewLiveDocs(state.SegmentInfo.DocCount);
                        }
                        if (state.LiveDocs.Get(docID))
                        {
                            state.DelCountOnFlush++;
                            state.LiveDocs.Clear(docID);
                        }
                    }

                    totalTermFreq += termFreq;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.

                    if (readPositions || readOffsets)
                    {
                        // we did record positions (& maybe payload) and/or offsets
                        int position = 0;
                        int offset   = 0;
                        for (int j = 0; j < termFreq; j++)
                        {
                            BytesRef thisPayload;

                            if (readPositions)
                            {
                                int code = prox.ReadVInt32();
                                position += code.TripleShift(1);

                                if ((code & 1) != 0)
                                {
                                    // this position has a payload
                                    int payloadLength = prox.ReadVInt32();

                                    if (payload == null)
                                    {
                                        payload       = new BytesRef();
                                        payload.Bytes = new byte[payloadLength];
                                    }
                                    else if (payload.Bytes.Length < payloadLength)
                                    {
                                        payload.Grow(payloadLength);
                                    }

                                    prox.ReadBytes(payload.Bytes, 0, payloadLength);
                                    payload.Length = payloadLength;
                                    thisPayload    = payload;
                                }
                                else
                                {
                                    thisPayload = null;
                                }

                                if (readOffsets)
                                {
                                    int startOffset = offset + prox.ReadVInt32();
                                    int endOffset   = startOffset + prox.ReadVInt32();
                                    if (writePositions)
                                    {
                                        if (writeOffsets)
                                        {
                                            if (Debugging.AssertsEnabled)
                                            {
                                                Debugging.Assert(startOffset >= 0 && endOffset >= startOffset, "startOffset={0},endOffset={1},offset={2}", startOffset, endOffset, offset);
                                            }
                                            postingsConsumer.AddPosition(position, thisPayload, startOffset, endOffset);
                                        }
                                        else
                                        {
                                            postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                        }
                                    }
                                    offset = startOffset;
                                }
                                else if (writePositions)
                                {
                                    postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                }
                            }
                        }
                    }
                    postingsConsumer.FinishDoc();
                }
                termsConsumer.FinishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
                sumTotalTermFreq += totalTermFreq;
                sumDocFreq       += docFreq;
            }

            termsConsumer.Finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.Cardinality());
        }
Beispiel #30
0
            /// <summary>
            /// Build a new <see cref="WAH8DocIdSet"/>. </summary>
            public virtual WAH8DocIdSet Build()
            {
                if (cardinality == 0)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(lastWordNum == -1);
                    }
                    return(EMPTY);
                }
                WriteSequence();
                byte[] data = Arrays.CopyOf(@out.Bytes, @out.Length);

                // Now build the index
                int valueCount = (numSequences - 1) / indexInterval + 1;
                MonotonicAppendingInt64Buffer indexPositions, indexWordNums;

                if (valueCount <= 1)
                {
                    indexPositions = indexWordNums = SINGLE_ZERO_BUFFER;
                }
                else
                {
                    const int pageSize         = 128;
                    int       initialPageCount = (valueCount + pageSize - 1) / pageSize;
                    MonotonicAppendingInt64Buffer positions = new MonotonicAppendingInt64Buffer(initialPageCount, pageSize, PackedInt32s.COMPACT);
                    MonotonicAppendingInt64Buffer wordNums  = new MonotonicAppendingInt64Buffer(initialPageCount, pageSize, PackedInt32s.COMPACT);

                    positions.Add(0L);
                    wordNums.Add(0L);
                    Iterator it = new Iterator(data, cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert([email protected] == 0);
                        Debugging.Assert(it.wordNum == -1);
                    }
                    for (int i = 1; i < valueCount; ++i)
                    {
                        // skip indexInterval sequences
                        for (int j = 0; j < indexInterval; ++j)
                        {
                            bool readSequence = it.ReadSequence();
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(readSequence);
                            }
                            it.SkipDirtyBytes();
                        }
                        int position = [email protected];
                        int wordNum  = it.wordNum;
                        positions.Add(position);
                        wordNums.Add(wordNum + 1);
                    }
                    positions.Freeze();
                    wordNums.Freeze();
                    indexPositions = positions;
                    indexWordNums  = wordNums;
                }

                return(new WAH8DocIdSet(data, cardinality, indexInterval, indexPositions, indexWordNums));
            }
Beispiel #31
0
 public skyetek_hid(String path)
 {
     MyDebugging = new Debugging(); //  For viewing results of API calls via Debug.Write.
     MyDeviceManagement = new DeviceManagement();
     MyHid = new Hid();
     readBuffer = new Queue<byte>(65);
     ReadFinished = false;
     ReadStart = 0;
     myDevicePathName = path;
 }
Beispiel #32
0
        private void CorProcess_OnBreakpoint(object sender, Debugging.CorDebug.CorBreakpointEventArgs e)
        {
            //executeMDbgCommand("w");
           // var sync = new AutoResetEvent(false);
        /*    O2Thread.mtaThread(() =>
                                   {
                                       var activeThread = CommandBase.Debugger.Processes.Active.Threads.Active;
                                       var filename = activeThread.BottomFrame.SourcePosition.Path;
                                       var line = activeThread.BottomFrame.SourcePosition.Line;
                                       O2Messages.raiseO2MDbgBreakEvent(filename, line);
                                       //sync.Set();
                                   });*/
            // first find the current thread (we need to use this.ActiveProcess.Threads since e.Thread doesn't give us the info we need
            //MDbgThread e.Thread.id
        //    if (DateTime.Now.Millisecond < 100)
          /*  if (DI.o2MDbg.LogBreakpointEvent)
                DI.log.info("*** BREAKPOINT >>  " + O2MDbgBreakPoint.getActiveFrameFunctionName(e));

            CorFrame activeFrame = e.Thread.ActiveFrame;
            activeFrame.
            //var activeThread = this.ActiveProcess.Threads.Active;

          //  MDbgSourcePosition pos = CommandBase.Debugger.Processes.Active.Threads.Active.CurrentSourcePosition;
activeFrame.Code.*/

           // var filename = "filenameOfBreak";
           // int line = 11;
            //sync.WaitOne();
            e.Continue = DI.o2MDbg.AutoContinueOnBreakPointEvent; // can't do this here since CommandBase.Debugger.Processes.Active.Threads.Active is only set after this is executed
        }