public override void Dump() { Debugging.CurrentPrice(Instrument); Debugging.Dump(Instrument, range: new Range(-100, 1), indicators: new[] { MA0, MA1 }); Debugging.Dump(new SnR(Instrument)); }
//public static void main( string[] args ) throws Exception { // Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); // QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer ); // Query query = parser.parse( "a x:b" ); // FieldQuery fieldQuery = new FieldQuery( query, true, false ); // Directory dir = new RAMDirectory(); // IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)); // Document doc = new Document(); // IndexableFieldType ft = new IndexableFieldType(TextField.TYPE_STORED); // ft.setStoreTermVectors(true); // ft.setStoreTermVectorOffsets(true); // ft.setStoreTermVectorPositions(true); // doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) ); // doc.add( new Field( "f", ft, "b a b a f" ) ); // writer.addDocument( doc ); // writer.close(); // IndexReader reader = IndexReader.open(dir1); // new FieldTermStack( reader, 0, "f", fieldQuery ); // reader.close(); //} /// <summary> /// a constructor. /// </summary> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="fieldName">field of the document to be highlighted</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery) { this.fieldName = fieldName; ISet <string> termSet = fieldQuery.GetTermSet(fieldName); // just return to make null snippet if un-matched fieldName specified when fieldMatch == true if (termSet == null) { return; } Fields vectors = reader.GetTermVectors(docId); if (vectors == null) { // null snippet return; } Terms vector = vectors.GetTerms(fieldName); if (vector == null) { // null snippet return; } CharsRef spare = new CharsRef(); TermsEnum termsEnum = vector.GetEnumerator(); DocsAndPositionsEnum dpEnum = null; BytesRef text; int numDocs = reader.MaxDoc; while (termsEnum.MoveNext()) { text = termsEnum.Term; UnicodeUtil.UTF8toUTF16(text, spare); string term = spare.ToString(); if (!termSet.Contains(term)) { continue; } dpEnum = termsEnum.DocsAndPositions(null, dpEnum); if (dpEnum == null) { // null snippet return; } dpEnum.NextDoc(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0); int freq = dpEnum.Freq; for (int i = 0; i < freq; i++) { int pos = dpEnum.NextPosition(); if (dpEnum.StartOffset < 0) { return; // no offsets, null snippet } termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight)); } } // sort by position CollectionUtil.TimSort(termList); // now look for dups at the same position, linking them together int currentPos = -1; TermInfo previous = null; TermInfo first = null; for (int i = 0; i < termList.Count;) { TermInfo current = termList[i]; if (current.Position == currentPos) { if (Debugging.AssertsEnabled) { Debugging.Assert(previous != null); } previous.SetNext(current); previous = current; //iterator.Remove(); // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item) termList.RemoveAt(i); } else { if (previous != null) { previous.SetNext(first); } previous = first = current; currentPos = current.Position; // LUCENENET NOTE: Only increment the position if we don't do a delete. i++; } } if (previous != null) { previous.SetNext(first); } }
internal override void AddTerm(int termID) { if (Debugging.AssertsEnabled) { Debugging.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start")); } FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray; if (Debugging.AssertsEnabled) { Debugging.Assert(!hasFreq || postings.termFreqs[termID] > 0); } if (!hasFreq) { if (Debugging.AssertsEnabled) { Debugging.Assert(postings.termFreqs == null); } if (docState.docID != postings.lastDocIDs[termID]) { if (Debugging.AssertsEnabled) { Debugging.Assert(docState.docID > postings.lastDocIDs[termID]); } termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID]); postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID]; postings.lastDocIDs[termID] = docState.docID; fieldState.UniqueTermCount++; } } else if (docState.docID != postings.lastDocIDs[termID]) { if (Debugging.AssertsEnabled) { Debugging.Assert(docState.docID > postings.lastDocIDs[termID], "id: {0} postings ID: {1} termID: {2}", docState.docID, postings.lastDocIDs[termID], termID); } // Term not yet seen in the current doc but previously // seen in other doc(s) since the last flush // Now that we know doc freq for previous doc, // write it & lastDocCode if (1 == postings.termFreqs[termID]) { termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID] | 1); } else { termsHashPerField.WriteVInt32(0, postings.lastDocCodes[termID]); termsHashPerField.WriteVInt32(0, postings.termFreqs[termID]); } postings.termFreqs[termID] = 1; fieldState.MaxTermFrequency = Math.Max(1, fieldState.MaxTermFrequency); postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1; postings.lastDocIDs[termID] = docState.docID; if (hasProx) { WriteProx(termID, fieldState.Position); if (hasOffsets) { postings.lastOffsets[termID] = 0; WriteOffsets(termID, fieldState.Offset); } } else { if (Debugging.AssertsEnabled) { Debugging.Assert(!hasOffsets); } } fieldState.UniqueTermCount++; } else { fieldState.MaxTermFrequency = Math.Max(fieldState.MaxTermFrequency, ++postings.termFreqs[termID]); if (hasProx) { WriteProx(termID, fieldState.Position - postings.lastPositions[termID]); } if (hasOffsets) { WriteOffsets(termID, fieldState.Offset); } } }
private void BtnCheckSeed_Click(object sender, EventArgs e) { if (!chkShowObtainable.Checked && !chkShowUnobtainable.Checked) { LBResult.Items.Clear(); return; } var logicCopy = Utility.CloneTrackerInstance(CheckerInstance); foreach (var entry in logicCopy.Logic) { entry.Available = false; entry.Checked = false; entry.Aquired = false; if (entry.SpoilerRandom > -1) { entry.RandomizedItem = entry.SpoilerRandom; } //Make the items randomized item its spoiler item, just for consitancy sake else if (entry.RandomizedItem > -1) { entry.SpoilerRandom = entry.RandomizedItem; } //If the item doesn't have spoiler data, but does have a randomized item. set it's spoiler data to the randomized item else if (entry.Unrandomized(2)) { entry.SpoilerRandom = entry.ID; entry.RandomizedItem = entry.ID; } //If the item doesn't have spoiler data or a randomized item and is unrandomized (manual), set it's spoiler item to it's self } LBResult.Items.Clear(); List <int> Ignored = new List <int>(); foreach (var item in LBIgnoredChecks.Items) { Ignored.Add((item as LogicObjects.ListItem).PathID); } var GameClearEntry = logicCopy.Logic.Find(x => x.DictionaryName == "MMRTGameClear"); if (GameClearEntry != null) { GameClearEntry.ItemName = (LogicObjects.MainTrackerInstance.IsMM()) ? "Defeat Majora" : "Beat the Game"; } else if (LogicObjects.MainTrackerInstance.IsMM()) { int GameClearID = PlaythroughGenerator.GetGameClearEntry(logicCopy.Logic, LogicObjects.MainTrackerInstance.IsEntranceRando()); logicCopy.Logic[GameClearID].ItemName = "Defeat Majora"; } CheckSeed(logicCopy, true, Ignored); List <string> obtainable = new List <string>(); List <string> unobtainable = new List <string>(); foreach (var item in LBNeededItems.Items) { bool Spoil = false; var ListItem = item as LogicObjects.ListItem; var iteminLogic = logicCopy.Logic[ListItem.PathID]; string ItemName = iteminLogic.ItemName ?? iteminLogic.DictionaryName; var ItemsLocation = iteminLogic.GetItemsNewLocation(logicCopy.Logic); string LocationFoundAt = (ItemsLocation != null) ? ItemsLocation.LocationName ?? ItemsLocation.DictionaryName : ""; string DisplayName = (Spoil) ? ItemName + ": " + LocationFoundAt : ItemName; Debugging.Log(logicCopy.Logic[ListItem.PathID].DictionaryName + " " + logicCopy.Logic[ListItem.PathID].Aquired); if (logicCopy.Logic[ListItem.PathID].Aquired) { obtainable.Add(DisplayName); } else { unobtainable.Add(DisplayName); } } if (unobtainable.Count > 0 && chkShowUnobtainable.Checked) { LBResult.Items.Add("Unobtainable =============================="); foreach (var i in unobtainable) { LBResult.Items.Add(i); } } if (obtainable.Count > 0 && chkShowObtainable.Checked) { LBResult.Items.Add("Obtainable =============================="); foreach (var i in obtainable) { LBResult.Items.Add(i); } } }
/// <summary> /// Safe (but, slowish) default method to write every /// vector field in the document. /// </summary> protected void AddAllDocVectors(Fields vectors, MergeState mergeState) { if (vectors == null) { StartDocument(0); FinishDocument(); return; } int numFields = vectors.Count; if (numFields == -1) { // count manually! TODO: Maybe enforce that Fields.size() returns something valid? numFields = 0; //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();) foreach (string it in vectors) { numFields++; } } StartDocument(numFields); string lastFieldName = null; TermsEnum termsEnum = null; DocsAndPositionsEnum docsAndPositionsEnum = null; int fieldCount = 0; foreach (string fieldName in vectors) { fieldCount++; FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName); if (Debugging.AssertsEnabled) { Debugging.Assert(lastFieldName == null || fieldName.CompareToOrdinal(lastFieldName) > 0, "lastFieldName={0} fieldName={1}", lastFieldName, fieldName); } lastFieldName = fieldName; Terms terms = vectors.GetTerms(fieldName); if (terms == null) { // FieldsEnum shouldn't lie... continue; } bool hasPositions = terms.HasPositions; bool hasOffsets = terms.HasOffsets; bool hasPayloads = terms.HasPayloads; if (Debugging.AssertsEnabled) { Debugging.Assert(!hasPayloads || hasPositions); } int numTerms = (int)terms.Count; if (numTerms == -1) { // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function numTerms = 0; termsEnum = terms.GetEnumerator(termsEnum); while (termsEnum.MoveNext()) { numTerms++; } } StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.GetEnumerator(termsEnum); int termCount = 0; while (termsEnum.MoveNext()) { termCount++; int freq = (int)termsEnum.TotalTermFreq; StartTerm(termsEnum.Term, freq); if (hasPositions || hasOffsets) { docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum); if (Debugging.AssertsEnabled) { Debugging.Assert(docsAndPositionsEnum != null); } int docID = docsAndPositionsEnum.NextDoc(); if (Debugging.AssertsEnabled) { Debugging.Assert(docID != DocIdSetIterator.NO_MORE_DOCS); Debugging.Assert(docsAndPositionsEnum.Freq == freq); } for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = docsAndPositionsEnum.NextPosition(); int startOffset = docsAndPositionsEnum.StartOffset; int endOffset = docsAndPositionsEnum.EndOffset; BytesRef payload = docsAndPositionsEnum.GetPayload(); if (Debugging.AssertsEnabled) { Debugging.Assert(!hasPositions || pos >= 0); } AddPosition(pos, startOffset, endOffset, payload); } } FinishTerm(); } if (Debugging.AssertsEnabled) { Debugging.Assert(termCount == numTerms); } FinishField(); } if (Debugging.AssertsEnabled) { Debugging.Assert(fieldCount == numFields); } FinishDocument(); }
public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); tvx = d.OpenInput(idxName, context); format = CheckValidFormat(tvx); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); tvd = d.OpenInput(fn, context); int tvdFormat = CheckValidFormat(tvd); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); tvf = d.OpenInput(fn, context); int tvfFormat = CheckValidFormat(tvf); if (Debugging.AssertsEnabled) { Debugging.Assert(format == tvdFormat); Debugging.Assert(format == tvfFormat); } numTotalDocs = (int)(tvx.Length >> 4); if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; if (Debugging.AssertsEnabled) { Debugging.Assert(size == 0 || numTotalDocs == size); } } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs if (Debugging.AssertsEnabled) { Debugging.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs={0} size={1} docStoreOffset={2}", numTotalDocs, size, docStoreOffset); } } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception t) when(t.IsThrowable()) { } } } }
internal Cell?nextCell; //see getLeafDocs /// <remarks>This is the primary algorithm; recursive. Returns null if finds none.</remarks> /// <exception cref="IOException"></exception> internal SmallDocSet?Visit(Cell cell, IBits acceptContains) { if (m_termsEnum is null) { //signals all done return(null); } ContainsPrefixTreeFilter outerInstance = (ContainsPrefixTreeFilter)base.m_filter; //Leaf docs match all query shape SmallDocSet?leafDocs = GetLeafDocs(cell, acceptContains); // Get the AND of all child results (into combinedSubResults) SmallDocSet?combinedSubResults = null; // Optimization: use null subCellsFilter when we know cell is within the query shape. IShape?subCellsFilter = outerInstance.m_queryShape; if (cell.Level != 0 && ((cell.ShapeRel == SpatialRelation.None || cell.ShapeRel == SpatialRelation.Within))) { subCellsFilter = null; if (Debugging.AssertsEnabled) { Debugging.Assert(cell.Shape.Relate(outerInstance.m_queryShape) == SpatialRelation.Within); } } ICollection <Cell> subCells = cell.GetSubCells(subCellsFilter); foreach (Cell subCell in subCells) { if (!SeekExact(subCell)) { combinedSubResults = null; } else if (subCell.Level == outerInstance.m_detailLevel) { combinedSubResults = GetDocs(subCell, acceptContains); } else if (!outerInstance.m_multiOverlappingIndexedShapes && subCell.ShapeRel == SpatialRelation.Within) { combinedSubResults = GetLeafDocs(subCell, acceptContains); //recursion } else { combinedSubResults = Visit(subCell, acceptContains); } if (combinedSubResults is null) { break; } acceptContains = combinedSubResults;//has the 'AND' effect on next iteration } // Result: OR the leaf docs with AND of all child results if (combinedSubResults != null) { if (leafDocs is null) { return(combinedSubResults); } return(leafDocs.Union(combinedSubResults));//union is 'or' } return(leafDocs); }
public override SeekStatus SeekCeil(BytesRef term) { queue.Clear(); numTop = 0; lastSeekExact = false; bool seekOpt = false; if (lastSeek != null && termComp.Compare(lastSeek, term) <= 0) { seekOpt = true; } lastSeekScratch.CopyBytes(term); lastSeek = lastSeekScratch; for (int i = 0; i < numSubs; i++) { SeekStatus status; // LUCENE-2130: if we had just seek'd already, prior // to this seek, and the new seek term is after the // previous one, don't try to re-seek this sub if its // current term is already beyond this new seek term. // Doing so is a waste because this sub will simply // seek to the same spot. if (seekOpt) { BytesRef curTerm = currentSubs[i].Current; if (curTerm != null) { int cmp = termComp.Compare(term, curTerm); if (cmp == 0) { status = SeekStatus.FOUND; } else if (cmp < 0) { status = SeekStatus.NOT_FOUND; } else { status = currentSubs[i].Terms.SeekCeil(term); } } else { status = SeekStatus.END; } } else { status = currentSubs[i].Terms.SeekCeil(term); } if (status == SeekStatus.FOUND) { top[numTop++] = currentSubs[i]; current = currentSubs[i].Current = currentSubs[i].Terms.Term; } else { if (status == SeekStatus.NOT_FOUND) { currentSubs[i].Current = currentSubs[i].Terms.Term; if (Debugging.AssertsEnabled) Debugging.Assert(currentSubs[i].Current != null); queue.Add(currentSubs[i]); } else { // enum exhausted currentSubs[i].Current = null; } } } if (numTop > 0) { // at least one sub had exact match to the requested term return SeekStatus.FOUND; } else if (queue.Count > 0) { // no sub had exact match, but at least one sub found // a term after the requested term -- advance to that // next term: PullTop(); return SeekStatus.NOT_FOUND; } else { return SeekStatus.END; } }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { // Can only reuse if incoming enum is also a MultiDocsEnum // ... and was previously created w/ this MultiTermsEnum: if (reuse is null || !(reuse is MultiDocsEnum docsEnum) || !docsEnum.CanReuse(this)) docsEnum = new MultiDocsEnum(this, subs.Length); int upto = 0; for (int i = 0; i < numTop; i++) { TermsEnumWithSlice entry = top[i]; IBits b; if (liveDocs is MultiBits multiLiveDocs) { // optimize for common case: requested skip docs is a // congruent sub-slice of MultiBits: in this case, we // just pull the liveDocs from the sub reader, rather // than making the inefficient // Slice(Multi(sub-readers)): MultiBits.SubResult sub = multiLiveDocs.GetMatchingSub(entry.SubSlice); if (sub.Matches) { b = sub.Result; } else { // custom case: requested skip docs is foreign: // must slice it on every access b = new BitsSlice(liveDocs, entry.SubSlice); } } else if (liveDocs != null) { b = new BitsSlice(liveDocs, entry.SubSlice); } else { // no deletions b = null; } if (Debugging.AssertsEnabled) Debugging.Assert(entry.Index < docsEnum.subDocsEnum.Length, "{0} vs {1}; {2}", entry.Index, docsEnum.subDocsEnum.Length, subs.Length); DocsEnum subDocsEnum = entry.Terms.Docs(b, docsEnum.subDocsEnum[entry.Index], flags); if (subDocsEnum != null) { docsEnum.subDocsEnum[entry.Index] = subDocsEnum; subDocs[upto].DocsEnum = subDocsEnum; subDocs[upto].Slice = entry.SubSlice; upto++; } else { // should this be an error? if (Debugging.AssertsEnabled) Debugging.Assert(false, "One of our subs cannot provide a docsenum"); } } if (upto == 0) { return null; } else { return docsEnum.Reset(subDocs, upto); } }
internal long currentEntryIndex; // also indicates how many entries in the index are valid. /// <summary> /// Construct an Elias-Fano encoder. /// After construction, call <see cref="EncodeNext(long)"/> <paramref name="numValues"/> times to encode /// a non decreasing sequence of non negative numbers. /// </summary> /// <param name="numValues"> The number of values that is to be encoded. </param> /// <param name="upperBound"> At least the highest value that will be encoded. /// For space efficiency this should not exceed the power of two that equals /// or is the first higher than the actual maximum. /// <para/>When <c>numValues >= (upperBound/3)</c> /// a <see cref="FixedBitSet"/> will take less space. </param> /// <param name="indexInterval"> The number of high zero bits for which a single index entry is built. /// The index will have at most <c>2 * numValues / indexInterval</c> entries /// and each index entry will use at most <c>Ceil(Log2(3 * numValues))</c> bits, /// see <see cref="EliasFanoEncoder"/>. </param> /// <exception cref="ArgumentException"> when: /// <list type="bullet"> /// <item><description><paramref name="numValues"/> is negative, or</description></item> /// <item><description><paramref name="numValues"/> is non negative and <paramref name="upperBound"/> is negative, or</description></item> /// <item><description>the low bits do not fit in a <c>long[]</c>: /// <c>(L * numValues / 64) > System.Int32.MaxValue</c>, or</description></item> /// <item><description>the high bits do not fit in a <c>long[]</c>: /// <c>(2 * numValues / 64) > System.Int32.MaxValue</c>, or</description></item> /// <item><description><c>indexInterval < 2</c>,</description></item> /// <item><description>the index bits do not fit in a <c>long[]</c>: /// <c>(numValues / indexInterval * ceil(2log(3 * numValues)) / 64) > System.Int32.MaxValue</c>.</description></item> /// </list> </exception> public EliasFanoEncoder(long numValues, long upperBound, long indexInterval) { if (numValues < 0L) { throw new ArgumentException("numValues should not be negative: " + numValues); } this.numValues = numValues; if ((numValues > 0L) && (upperBound < 0L)) { throw new ArgumentException("upperBound should not be negative: " + upperBound + " when numValues > 0"); } this.upperBound = numValues > 0 ? upperBound : -1L; // if there is no value, -1 is the best upper bound int nLowBits = 0; if (this.numValues > 0) // nLowBits = max(0; floor(2log(upperBound/numValues))) { long lowBitsFac = this.upperBound / this.numValues; if (lowBitsFac > 0) { nLowBits = 63 - lowBitsFac.LeadingZeroCount(); // see Long.numberOfLeadingZeros javadocs } } this.numLowBits = nLowBits; this.lowerBitsMask = (long)(unchecked ((ulong)long.MaxValue) >> (sizeof(long) * 8 - 1 - this.numLowBits)); long numLongsForLowBits = NumInt64sForBits(numValues * numLowBits); if (numLongsForLowBits > int.MaxValue) { throw new ArgumentException("numLongsForLowBits too large to index a long array: " + numLongsForLowBits); } this.lowerLongs = new long[(int)numLongsForLowBits]; long numHighBitsClear = (long)((ulong)((this.upperBound > 0) ? this.upperBound : 0) >> this.numLowBits); if (Debugging.AssertsEnabled) { Debugging.Assert(numHighBitsClear <= (2 * this.numValues)); } long numHighBitsSet = this.numValues; long numLongsForHighBits = NumInt64sForBits(numHighBitsClear + numHighBitsSet); if (numLongsForHighBits > int.MaxValue) { throw new ArgumentException("numLongsForHighBits too large to index a long array: " + numLongsForHighBits); } this.upperLongs = new long[(int)numLongsForHighBits]; if (indexInterval < 2) { throw new ArgumentException("indexInterval should at least 2: " + indexInterval); } // For the index: long maxHighValue = (long)((ulong)upperBound >> this.numLowBits); long nIndexEntries = maxHighValue / indexInterval; // no zero value index entry this.numIndexEntries = (nIndexEntries >= 0) ? nIndexEntries : 0; long maxIndexEntry = maxHighValue + numValues - 1; // clear upper bits, set upper bits, start at zero this.nIndexEntryBits = (maxIndexEntry <= 0) ? 0 : (64 - maxIndexEntry.LeadingZeroCount()); long numLongsForIndexBits = NumInt64sForBits(numIndexEntries * nIndexEntryBits); if (numLongsForIndexBits > int.MaxValue) { throw new ArgumentException("numLongsForIndexBits too large to index a long array: " + numLongsForIndexBits); } this.upperZeroBitPositionIndex = new long[(int)numLongsForIndexBits]; this.currentEntryIndex = 0; this.indexInterval = indexInterval; }
public override bool SeekExact(BytesRef term) { queue.Clear(); numTop = 0; bool seekOpt = false; if (lastSeek != null && termComp.Compare(lastSeek, term) <= 0) { seekOpt = true; } lastSeek = null; lastSeekExact = true; for (int i = 0; i < numSubs; i++) { bool status; // LUCENE-2130: if we had just seek'd already, prior // to this seek, and the new seek term is after the // previous one, don't try to re-seek this sub if its // current term is already beyond this new seek term. // Doing so is a waste because this sub will simply // seek to the same spot. if (seekOpt) { BytesRef curTerm = currentSubs[i].Current; if (curTerm != null) { int cmp = termComp.Compare(term, curTerm); if (cmp == 0) { status = true; } else if (cmp < 0) { status = false; } else { status = currentSubs[i].Terms.SeekExact(term); } } else { status = false; } } else { status = currentSubs[i].Terms.SeekExact(term); } if (status) { top[numTop++] = currentSubs[i]; current = currentSubs[i].Current = currentSubs[i].Terms.Term; if (Debugging.AssertsEnabled) Debugging.Assert(term.Equals(currentSubs[i].Current)); } } // if at least one sub had exact match to the requested // term then we found match return numTop > 0; }
internal void FinishDocument() { if (Debugging.AssertsEnabled) { Debugging.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start")); } int numPostings = termsHashPerField.bytesHash.Count; BytesRef flushTerm = termsWriter.flushTerm; if (Debugging.AssertsEnabled) { Debugging.Assert(numPostings >= 0); } if (numPostings > maxNumPostings) { maxNumPostings = numPostings; } // this is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. if (Debugging.AssertsEnabled) { Debugging.Assert(termsWriter.VectorFieldsInOrder(fieldInfo)); } TermVectorsPostingsArray postings = (TermVectorsPostingsArray)termsHashPerField.postingsArray; TermVectorsWriter tv = termsWriter.writer; int[] termIDs = termsHashPerField.SortPostings(tv.Comparer); tv.StartField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads); ByteSliceReader posReader = doVectorPositions ? termsWriter.vectorSliceReaderPos : null; ByteSliceReader offReader = doVectorOffsets ? termsWriter.vectorSliceReaderOff : null; ByteBlockPool termBytePool = termsHashPerField.termBytePool; for (int j = 0; j < numPostings; j++) { int termID = termIDs[j]; int freq = postings.freqs[termID]; // Get BytesRef termBytePool.SetBytesRef(flushTerm, postings.textStarts[termID]); tv.StartTerm(flushTerm, freq); if (doVectorPositions || doVectorOffsets) { if (posReader != null) { termsHashPerField.InitReader(posReader, termID, 0); } if (offReader != null) { termsHashPerField.InitReader(offReader, termID, 1); } tv.AddProx(freq, posReader, offReader); } tv.FinishTerm(); } tv.FinishField(); termsHashPerField.Reset(); fieldInfo.SetStoreTermVectors(); }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { // Load the index into memory string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION); indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); } indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM) { indexStream.ReadVInt64(); // the end of the data file CodecUtil.CheckFooter(indexStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexStream); #pragma warning restore 612, 618 } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION); vectorsStream = d.OpenInput(vectorsStreamFN, context); string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT; int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (version != version2) { throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2); } if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.GetFilePointer()); } packedIntsVersion = vectorsStream.ReadVInt32(); chunkSize = vectorsStream.ReadVInt32(); decompressor = compressionMode.NewDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this, indexStream); } } }
public override Fields Get(int doc) { EnsureOpen(); // seek to the right place { long startPointer = indexReader.GetStartPointer(doc); vectorsStream.Seek(startPointer); } // decode // - docBase: first doc ID of the chunk // - chunkDocs: number of docs of the chunk int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")"); } int skip; // number of fields to skip int numFields; // number of fields of the document we're looking for int totalFields; // total number of fields of the chunk (sum for all docs) if (chunkDocs == 1) { skip = 0; numFields = totalFields = vectorsStream.ReadVInt32(); } else { reader.Reset(vectorsStream, chunkDocs); int sum = 0; for (int i = docBase; i < doc; ++i) { sum += (int)reader.Next(); } skip = sum; numFields = (int)reader.Next(); sum += numFields; for (int i = doc + 1; i < docBase + chunkDocs; ++i) { sum += (int)reader.Next(); } totalFields = sum; } if (numFields == 0) { // no vectors return(null); } // read field numbers that have term vectors int[] fieldNums; { int token = vectorsStream.ReadByte() & 0xFF; if (Debugging.AssertsEnabled) { Debugging.Assert(token != 0); // means no term vectors, cannot happen since we checked for numFields == 0 } int bitsPerFieldNum = token & 0x1F; int totalDistinctFields = (int)((uint)token >> 5); if (totalDistinctFields == 0x07) { totalDistinctFields += vectorsStream.ReadVInt32(); } ++totalDistinctFields; PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1); fieldNums = new int[totalDistinctFields]; for (int i = 0; i < totalDistinctFields; ++i) { fieldNums[i] = (int)it.Next(); } } // read field numbers and flags int[] fieldNumOffs = new int[numFields]; PackedInt32s.Reader flags; { int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1); PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff); switch (vectorsStream.ReadVInt32()) { case 0: PackedInt32s.Reader fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS); PackedInt32s.Mutable f = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT); for (int i = 0; i < totalFields; ++i) { int fieldNumOff = (int)allFieldNumOffs.Get(i); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length); } int fgs = (int)fieldFlags.Get(fieldNumOff); f.Set(i, fgs); } flags = f; break; case 1: flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS); break; default: throw new Exception(); } for (int i = 0; i < numFields; ++i) { fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i); } } // number of terms per field for all fields PackedInt32s.Reader numTerms; int totalTerms; { int bitsRequired = vectorsStream.ReadVInt32(); numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired); int sum = 0; for (int i = 0; i < totalFields; ++i) { sum += (int)numTerms.Get(i); } totalTerms = sum; } // term lengths int docOff = 0, docLen = 0, totalLen; int[] fieldLengths = new int[numFields]; int[][] prefixLengths = new int[numFields][]; int[][] suffixLengths = new int[numFields][]; { reader.Reset(vectorsStream, totalTerms); // skip int toSkip = 0; for (int i = 0; i < skip; ++i) { toSkip += (int)numTerms.Get(i); } reader.Skip(toSkip); // read prefix lengths for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldPrefixLengths = new int[termCount]; prefixLengths[i] = fieldPrefixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } } reader.Skip(totalTerms - reader.Ord); reader.Reset(vectorsStream, totalTerms); // skip toSkip = 0; for (int i = 0; i < skip; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { docOff += (int)reader.Next(); } } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldSuffixLengths = new int[termCount]; suffixLengths[i] = fieldSuffixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } fieldLengths[i] = Sum(suffixLengths[i]); docLen += fieldLengths[i]; } totalLen = docOff + docLen; for (int i = skip + numFields; i < totalFields; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { totalLen += (int)reader.Next(); } } } // term freqs int[] termFreqs = new int[totalTerms]; { reader.Reset(vectorsStream, totalTerms); for (int i = 0; i < totalTerms;) { Int64sRef next = reader.Next(totalTerms - i); for (int k = 0; k < next.Length; ++k) { termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k]; } } } // total number of positions, offsets and payloads int totalPositions = 0, totalOffsets = 0, totalPayloads = 0; for (int i = 0, termIndex = 0; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex++]; if ((f & CompressingTermVectorsWriter.POSITIONS) != 0) { totalPositions += freq; } if ((f & CompressingTermVectorsWriter.OFFSETS) != 0) { totalOffsets += freq; } if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { totalPayloads += freq; } } if (Debugging.AssertsEnabled) { Debugging.Assert(i != totalFields - 1 || termIndex == totalTerms, () => termIndex + " " + totalTerms); } } int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs); int[][] positions, startOffsets, lengths; if (totalPositions > 0) { positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex); } else { positions = new int[numFields][]; } if (totalOffsets > 0) { // average number of chars per term float[] charsPerTerm = new float[fieldNums.Length]; for (int i = 0; i < charsPerTerm.Length; ++i) { charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32()); } startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); lengths = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); for (int i = 0; i < numFields; ++i) { int[] fStartOffsets = startOffsets[i]; int[] fPositions = positions[i]; // patch offsets from positions if (fStartOffsets != null && fPositions != null) { float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]]; for (int j = 0; j < startOffsets[i].Length; ++j) { fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]); } } if (fStartOffsets != null) { int[] fPrefixLengths = prefixLengths[i]; int[] fSuffixLengths = suffixLengths[i]; int[] fLengths = lengths[i]; for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets and patch lengths using term lengths int termLength = fPrefixLengths[j] + fSuffixLengths[j]; lengths[i][positionIndex[i][j]] += termLength; for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) { fStartOffsets[k] += fStartOffsets[k - 1]; fLengths[k] += termLength; } } } } } else { startOffsets = lengths = new int[numFields][]; } if (totalPositions > 0) { // delta-decode positions for (int i = 0; i < numFields; ++i) { int[] fPositions = positions[i]; int[] fpositionIndex = positionIndex[i]; if (fPositions != null) { for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) { fPositions[k] += fPositions[k - 1]; } } } } } // payload lengths int[][] payloadIndex = new int[numFields][]; int totalPayloadLength = 0; int payloadOff = 0; int payloadLen = 0; if (totalPayloads > 0) { reader.Reset(vectorsStream, totalPayloads); // skip int termIndex = 0; for (int i = 0; i < skip; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int l = (int)reader.Next(); payloadOff += l; } } } termIndex += termCount; } totalPayloadLength = payloadOff; // read doc payload lengths for (int i = 0; i < numFields; ++i) { int f = (int)flags.Get(skip + i); int termCount = (int)numTerms.Get(skip + i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { int totalFreq = positionIndex[i][termCount]; payloadIndex[i] = new int[totalFreq + 1]; int posIdx = 0; payloadIndex[i][posIdx] = payloadLen; for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int payloadLength = (int)reader.Next(); payloadLen += payloadLength; payloadIndex[i][posIdx + 1] = payloadLen; ++posIdx; } } if (Debugging.AssertsEnabled) { Debugging.Assert(posIdx == totalFreq); } } termIndex += termCount; } totalPayloadLength += payloadLen; for (int i = skip + numFields; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { totalPayloadLength += (int)reader.Next(); } } } termIndex += termCount; } if (Debugging.AssertsEnabled) { Debugging.Assert(termIndex == totalTerms, () => termIndex + " " + totalTerms); } } // decompress data BytesRef suffixBytes = new BytesRef(); decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes); suffixBytes.Length = docLen; BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen); int[] FieldFlags = new int[numFields]; for (int i = 0; i < numFields; ++i) { FieldFlags[i] = (int)flags.Get(skip + i); } int[] fieldNumTerms = new int[numFields]; for (int i = 0; i < numFields; ++i) { fieldNumTerms[i] = (int)numTerms.Get(skip + i); } int[][] fieldTermFreqs = new int[numFields][]; { int termIdx = 0; for (int i = 0; i < skip; ++i) { termIdx += (int)numTerms.Get(i); } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); fieldTermFreqs[i] = new int[termCount]; for (int j = 0; j < termCount; ++j) { fieldTermFreqs[i][j] = termFreqs[termIdx++]; } } } if (Debugging.AssertsEnabled) { Debugging.Assert(Sum(fieldLengths) == docLen, () => Sum(fieldLengths) + " != " + docLen); } return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes)); }
internal readonly bool isSegmentPrivate; // set to true iff this frozen packet represents // a segment private deletes. in that case is should // only have Queries public FrozenBufferedUpdates(BufferedUpdates deletes, bool isSegmentPrivate) { this.isSegmentPrivate = isSegmentPrivate; if (Debugging.AssertsEnabled) { Debugging.Assert(!isSegmentPrivate || deletes.terms.Count == 0, () => "segment private package should only have del queries"); } Term[] termsArray = deletes.terms.Keys.ToArray(/*new Term[deletes.terms.Count]*/); termCount = termsArray.Length; ArrayUtil.TimSort(termsArray); PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder(); foreach (Term term in termsArray) { builder.Add(term); } terms = builder.Finish(); queries = new Query[deletes.queries.Count]; queryLimits = new int[deletes.queries.Count]; int upto = 0; foreach (KeyValuePair <Query, int?> ent in deletes.queries) { queries[upto] = ent.Key; if (ent.Value.HasValue) { queryLimits[upto] = ent.Value.Value; } else { // LUCENENET NOTE: According to this: http://stackoverflow.com/a/13914344 // we are supposed to throw an exception in this case, rather than // silently fail. throw new NullReferenceException(); } upto++; } // TODO if a Term affects multiple fields, we could keep the updates key'd by Term // so that it maps to all fields it affects, sorted by their docUpto, and traverse // that Term only once, applying the update to all fields that still need to be // updated. IList <NumericDocValuesUpdate> allNumericUpdates = new List <NumericDocValuesUpdate>(); int numericUpdatesSize = 0; foreach (var numericUpdates in deletes.numericUpdates.Values) { foreach (NumericDocValuesUpdate update in numericUpdates.Values) { allNumericUpdates.Add(update); numericUpdatesSize += update.GetSizeInBytes(); } } numericDVUpdates = allNumericUpdates.ToArray(); // TODO if a Term affects multiple fields, we could keep the updates key'd by Term // so that it maps to all fields it affects, sorted by their docUpto, and traverse // that Term only once, applying the update to all fields that still need to be // updated. IList <BinaryDocValuesUpdate> allBinaryUpdates = new List <BinaryDocValuesUpdate>(); int binaryUpdatesSize = 0; foreach (var binaryUpdates in deletes.binaryUpdates.Values) { foreach (BinaryDocValuesUpdate update in binaryUpdates.Values) { allBinaryUpdates.Add(update); binaryUpdatesSize += update.GetSizeInBytes(); } } binaryDVUpdates = allBinaryUpdates.ToArray(); bytesUsed = (int)terms.GetSizeInBytes() + queries.Length * BYTES_PER_DEL_QUERY + numericUpdatesSize + numericDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF + binaryUpdatesSize + binaryDVUpdates.Length * RamUsageEstimator.NUM_BYTES_OBJECT_REF; numTermDeletes = deletes.numTermDeletes; }
public override DocsAndPositionsEnum DocsAndPositions(IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags) { // Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum // ... and was previously created w/ this MultiTermsEnum: if (reuse is null || !(reuse is MultiDocsAndPositionsEnum docsAndPositionsEnum) || !docsAndPositionsEnum.CanReuse(this)) docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.Length); int upto = 0; for (int i = 0; i < numTop; i++) { TermsEnumWithSlice entry = top[i]; IBits b; if (liveDocs is MultiBits multiLiveDocs) { // Optimize for common case: requested skip docs is a // congruent sub-slice of MultiBits: in this case, we // just pull the liveDocs from the sub reader, rather // than making the inefficient // Slice(Multi(sub-readers)): MultiBits.SubResult sub = multiLiveDocs.GetMatchingSub(top[i].SubSlice); if (sub.Matches) { b = sub.Result; } else { // custom case: requested skip docs is foreign: // must slice it on every access (very // inefficient) b = new BitsSlice(liveDocs, top[i].SubSlice); } } else if (liveDocs != null) { b = new BitsSlice(liveDocs, top[i].SubSlice); } else { // no deletions b = null; } if (Debugging.AssertsEnabled) Debugging.Assert(entry.Index < docsAndPositionsEnum.subDocsAndPositionsEnum.Length, "{0} vs {1}; {2}", entry.Index, docsAndPositionsEnum.subDocsAndPositionsEnum.Length, subs.Length); DocsAndPositionsEnum subPostings = entry.Terms.DocsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.Index], flags); if (subPostings != null) { docsAndPositionsEnum.subDocsAndPositionsEnum[entry.Index] = subPostings; subDocsAndPositions[upto].DocsAndPositionsEnum = subPostings; subDocsAndPositions[upto].Slice = entry.SubSlice; upto++; } else { if (entry.Terms.Docs(b, null, DocsFlags.NONE) != null) { // At least one of our subs does not store // offsets or positions -- we can't correctly // produce a MultiDocsAndPositions enum return null; } } } if (upto == 0) { return null; } else { return docsAndPositionsEnum.Reset(subDocsAndPositions, upto); } }
/// <exception cref="IOException"/> protected override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (Debugging.AssertsEnabled) { Debugging.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads); } if (curStorePayloads) { int delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt32(delta << 1); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt32(delta << 1 | 1); skipBuffer.WriteVInt32(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt32(curDoc - lastSkipDoc[level]); } if (indexOptions != IndexOptions.DOCS_ONLY) { freqIndex[level].Mark(); freqIndex[level].Write(skipBuffer, false); } docIndex[level].Mark(); docIndex[level].Write(skipBuffer, false); if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { posIndex[level].Mark(); posIndex[level].Write(skipBuffer, false); if (curStorePayloads) { skipBuffer.WriteVInt32((int)(curPayloadPointer - lastSkipPayloadPointer[level])); } } lastSkipDoc[level] = curDoc; lastSkipPayloadPointer[level] = curPayloadPointer; }
public TermsEnumWithSlice(int index, ReaderSlice subSlice) { this.SubSlice = subSlice; this.Index = index; if (Debugging.AssertsEnabled) Debugging.Assert(subSlice.Length >= 0,"length={0}", subSlice.Length); }
private IEnumerable <long?> GetMergeSortedSetOrdsEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs, OrdinalMap map) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; IBits currentLiveDocs = null; var ords = new long[8]; int ordUpto = 0; int ordLength = 0; while (true) { if (readerUpto == readers.Length) { yield break; } if (ordUpto < ordLength) { var value = ords[ordUpto]; ordUpto++; yield return(value); continue; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < readers.Length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { if (Debugging.AssertsEnabled) { Debugging.Assert(docIDUpto < currentReader.MaxDoc); } SortedSetDocValues dv = dvs[readerUpto]; dv.SetDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.Length) { ords = ArrayUtil.Grow(ords, ordLength + 1); } ords[ordLength] = map.GetGlobalOrd(readerUpto, ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(() => (StoredFieldsReader)fieldsReaderOrig.Clone()); termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(() => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone()); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(fields != null); } // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(normsProducer != null); } } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); var output = directory.CreateOutput(fileName, context); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.Write(output, NUMFIELDS); SimpleTextUtil.Write(output, infos.Count.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); foreach (FieldInfo fi in infos) { SimpleTextUtil.Write(output, NAME); SimpleTextUtil.Write(output, fi.Name, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NUMBER); SimpleTextUtil.Write(output, fi.Number.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ISINDEXED); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.IsIndexed.ToString()), scratch); SimpleTextUtil.WriteNewline(output); if (fi.IsIndexed) { // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (Debugging.AssertsEnabled) { Debugging.Assert(IndexOptionsComparer.Default.Compare(fi.IndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); } SimpleTextUtil.Write(output, INDEXOPTIONS); SimpleTextUtil.Write(output, fi.IndexOptions != IndexOptions.NONE ? fi.IndexOptions.ToString() : string.Empty, scratch); SimpleTextUtil.WriteNewline(output); } SimpleTextUtil.Write(output, STORETV); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasVectors.ToString()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, PAYLOADS); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasPayloads.ToString()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower((!fi.OmitsNorms).ToString()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS_TYPE); SimpleTextUtil.Write(output, GetDocValuesType(fi.NormType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES); SimpleTextUtil.Write(output, GetDocValuesType(fi.DocValuesType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES_GEN); SimpleTextUtil.Write(output, fi.DocValuesGen.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); IDictionary <string, string> atts = fi.Attributes; int numAtts = atts is null ? 0 : atts.Count; SimpleTextUtil.Write(output, NUM_ATTS); SimpleTextUtil.Write(output, numAtts.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); if (numAtts <= 0 || atts is null) { continue; } foreach (var entry in atts) { SimpleTextUtil.Write(output, ATT_KEY); SimpleTextUtil.Write(output, entry.Key, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ATT_VALUE); SimpleTextUtil.Write(output, entry.Value, scratch); SimpleTextUtil.WriteNewline(output); } } SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override ThreadState GetAndLock(Thread requestingThread, DocumentsWriter documentsWriter) { ThreadState threadState = null; if (NumThreadStatesActive == 0) { lock (this) { if (NumThreadStatesActive == 0) { threadState = states[0] = NewThreadState(); return(threadState); } } } if (Debugging.AssertsEnabled) { Debugging.Assert(NumThreadStatesActive > 0); } for (int i = 0; i < maxRetry; i++) { int ord = random.Next(NumThreadStatesActive); lock (this) { threadState = states[ord]; if (Debugging.AssertsEnabled) { Debugging.Assert(threadState != null); } } if (threadState.TryLock()) { return(threadState); } if (random.Next(20) == 0) { break; } } /* * only try to create a new threadstate if we can not lock the randomly * selected state. this is important since some tests rely on a single * threadstate in the single threaded case. Eventually it would be nice if * we would not have this limitation but for now we just make sure we only * allocate one threadstate if indexing is single threaded */ lock (this) { ThreadState newThreadState = NewThreadState(); if (newThreadState != null) // did we get a new state? { threadState = states[NumThreadStatesActive - 1] = newThreadState; //if (Debugging.AssertsEnabled) Debugging.Assert(threadState.HeldByCurrentThread); return(threadState); } // if no new state is available lock the random one } if (Debugging.AssertsEnabled) { Debugging.Assert(threadState != null); } threadState.@Lock(); return(threadState); }
/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.Enqueue(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.Dequeue(); //worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { if (Debugging.AssertsEnabled) { Debugging.Assert(lastPoint != -1); } statesSet.ComputeHash(); if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q == null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.Enqueue(p); if (newStateUpto == newStatesArray.Length) { // LUCENENET: Resize rather than copy Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { if (Debugging.AssertsEnabled) { Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet); } } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); if (Debugging.AssertsEnabled) { Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto); } } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }
/// <summary> /// Builds an <see cref="SynonymMap"/> and returns it. /// </summary> public virtual SynonymMap Build() { ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton; // TODO: are we using the best sharing options? var builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs); BytesRef scratch = new BytesRef(64); ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput(); ISet <int> dedupSet; if (dedup) { dedupSet = new JCG.HashSet <int>(); } else { dedupSet = null; } var spare = new byte[5]; ICollection <CharsRef> keys = workingSet.Keys; CharsRef[] sortedKeys = new CharsRef[keys.Count]; keys.CopyTo(sortedKeys, 0); #pragma warning disable 612, 618 System.Array.Sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparer); #pragma warning restore 612, 618 Int32sRef scratchIntsRef = new Int32sRef(); //System.out.println("fmap.build"); for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++) { CharsRef input = sortedKeys[keyIdx]; MapEntry output = workingSet[input]; int numEntries = output.ords.Count; // output size, assume the worst case int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry scratch.Grow(estimatedSize); scratchOutput.Reset(scratch.Bytes, scratch.Offset, scratch.Bytes.Length); if (Debugging.AssertsEnabled) { Debugging.Assert(scratch.Offset == 0); } // now write our output data: int count = 0; for (int i = 0; i < numEntries; i++) { if (dedupSet != null) { // LUCENENET specific - no boxing happening here int ent = output.ords[i]; if (dedupSet.Contains(ent)) { continue; } dedupSet.Add(ent); } scratchOutput.WriteVInt32(output.ords[i]); count++; } int pos = scratchOutput.Position; scratchOutput.WriteVInt32(count << 1 | (output.includeOrig ? 0 : 1)); int pos2 = scratchOutput.Position; int vIntLen = pos2 - pos; // Move the count + includeOrig to the front of the byte[]: Array.Copy(scratch.Bytes, pos, spare, 0, vIntLen); Array.Copy(scratch.Bytes, 0, scratch.Bytes, vIntLen, pos); Array.Copy(spare, 0, scratch.Bytes, 0, vIntLen); if (dedupSet != null) { dedupSet.Clear(); } scratch.Length = scratchOutput.Position - scratch.Offset; //System.out.println(" add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count); builder.Add(Lucene.Net.Util.Fst.Util.ToUTF32(input.ToString(), scratchIntsRef), BytesRef.DeepCopyOf(scratch)); } FST <BytesRef> fst = builder.Finish(); return(new SynonymMap(fst, words, maxHorizontalContext)); }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { UninterruptableMonitor.Enter(this); try { if (Debugging.AssertsEnabled) { Debugging.Assert(!UninterruptableMonitor.IsEntered(writer)); } this.m_writer = writer; InitMergeThreadPriority(); m_dir = writer.Directory; // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (IsVerbose) { Message("now merge"); Message(" index: " + writer.SegString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { long startStallTime = 0; while (writer.HasPendingMerges() && MergeThreadCount >= maxMergeCount) { // this means merging has fallen too far behind: we // have already created maxMergeCount threads, and // now there's at least one more merge pending. // Note that only maxThreadCount of // those created merge threads will actually be // running; the rest will be paused (see // updateMergeThreads). We stall this producer // thread to prevent creation of new segments, // until merging has caught up: startStallTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results if (IsVerbose) { Message(" too many merges; stalling..."); } try { UninterruptableMonitor.Wait(this); } catch (Exception ie) when(ie.IsInterruptedException()) { throw new Util.ThreadInterruptedException(ie); } } if (IsVerbose) { if (startStallTime != 0) { Message(" stalled for " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startStallTime) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } } MergePolicy.OneMerge merge = writer.NextMerge(); if (merge == null) { if (IsVerbose) { Message(" no more merges pending; now return"); } return; } bool success = false; try { if (IsVerbose) { Message(" consider merge " + writer.SegString(merge.Segments)); } // OK to spawn a new merge thread to handle this // merge: MergeThread merger = GetMergeThread(writer, merge); m_mergeThreads.Add(merger); if (IsVerbose) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(); // Must call this after starting the thread else // the new thread is removed from mergeThreads // (since it's not alive yet): UpdateMergeThreads(); success = true; } finally { if (!success) { writer.MergeFinish(merge); } } } } finally { UninterruptableMonitor.Exit(this); } }
/// <summary> /// Compute the intersection of the provided sets. This method is much faster than /// computing the intersection manually since it operates directly at the byte level. /// </summary> public static WAH8DocIdSet Intersect(ICollection <WAH8DocIdSet> docIdSets, int indexInterval) { // LUCENENET: Added guard clause for null if (docIdSets is null) { throw new ArgumentNullException(nameof(docIdSets)); } switch (docIdSets.Count) { case 0: throw new ArgumentException("There must be at least one set to intersect"); case 1: return(docIdSets.First()); } // The logic below is similar to ConjunctionScorer int numSets = docIdSets.Count; var iterators = new Iterator[numSets]; int i = 0; foreach (WAH8DocIdSet set in docIdSets) { var it = (Iterator)set.GetIterator(); iterators[i++] = it; } Array.Sort(iterators, SERIALIZED_LENGTH_COMPARER); WordBuilder builder = (WordBuilder)(new WordBuilder()).SetIndexInterval(indexInterval); int wordNum = 0; while (true) { // Advance the least costly iterator first iterators[0].AdvanceWord(wordNum); wordNum = iterators[0].wordNum; if (wordNum == DocIdSetIterator.NO_MORE_DOCS) { break; } byte word = iterators[0].word; for (i = 1; i < numSets; ++i) { if (iterators[i].wordNum < wordNum) { iterators[i].AdvanceWord(wordNum); } if (iterators[i].wordNum > wordNum) { wordNum = iterators[i].wordNum; goto mainContinue; } if (Debugging.AssertsEnabled) { Debugging.Assert(iterators[i].wordNum == wordNum); } word &= iterators[i].word; if (word == 0) { // There are common words, but they don't share any bit ++wordNum; goto mainContinue; } } // Found a common word if (Debugging.AssertsEnabled) { Debugging.Assert(word != 0); } builder.AddWord(wordNum, word); ++wordNum; mainContinue :; } //mainBreak: return(builder.Build()); }
/// <summary> /// Given a <paramref name="target"/> value, advance the decoding index to the first bigger or equal value /// and return it if it is available. Otherwise return <see cref="NO_MORE_VALUES"/>. /// <para/> /// The current implementation uses the index on the upper zero bit positions. /// </summary> public virtual long AdvanceToValue(long target) { efIndex += 1; if (efIndex >= numEncoded) { return(NO_MORE_VALUES); } setBitForIndex += 1; // the high bit at setBitForIndex belongs to the unary code for efIndex int highIndex = (int)((long)((ulong)setBitForIndex >> LOG2_INT64_SIZE)); long upperLong = efEncoder.upperLongs[highIndex]; curHighLong = (long)((ulong)upperLong >> ((int)(setBitForIndex & ((sizeof(long) * 8) - 1)))); // may contain the unary 1 bit for efIndex // determine index entry to advance to long highTarget = (long)((ulong)target >> efEncoder.numLowBits); long indexEntryIndex = (highTarget / efEncoder.indexInterval) - 1; if (indexEntryIndex >= 0) // not before first index entry { if (indexEntryIndex >= numIndexEntries) { indexEntryIndex = numIndexEntries - 1; // no further than last index entry } long indexHighValue = (indexEntryIndex + 1) * efEncoder.indexInterval; if (Debugging.AssertsEnabled) { Debugging.Assert(indexHighValue <= highTarget); } if (indexHighValue > (setBitForIndex - efIndex)) // advance to just after zero bit position of index entry. { setBitForIndex = UnPackValue(efEncoder.upperZeroBitPositionIndex, efEncoder.nIndexEntryBits, indexEntryIndex, indexMask); efIndex = setBitForIndex - indexHighValue; // the high bit at setBitForIndex belongs to the unary code for efIndex highIndex = (int)(((ulong)setBitForIndex >> LOG2_INT64_SIZE)); upperLong = efEncoder.upperLongs[highIndex]; curHighLong = (long)((ulong)upperLong >> ((int)(setBitForIndex & ((sizeof(long) * 8) - 1)))); // may contain the unary 1 bit for efIndex } if (Debugging.AssertsEnabled) { Debugging.Assert(efIndex < numEncoded); // there is a high value to be found. } } int curSetBits = curHighLong.PopCount(); // shifted right. int curClearBits = (sizeof(long) * 8) - curSetBits - ((int)(setBitForIndex & ((sizeof(long) * 8) - 1))); // subtract right shift, may be more than encoded while (((setBitForIndex - efIndex) + curClearBits) < highTarget) { // curHighLong has not enough clear bits to reach highTarget efIndex += curSetBits; if (efIndex >= numEncoded) { return(NO_MORE_VALUES); } setBitForIndex += (sizeof(long) * 8) - (setBitForIndex & ((sizeof(long) * 8) - 1)); // highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE); if (Debugging.AssertsEnabled) { Debugging.Assert((highIndex + 1) == (int)((long)((ulong)setBitForIndex >> LOG2_INT64_SIZE))); } highIndex += 1; upperLong = efEncoder.upperLongs[highIndex]; curHighLong = upperLong; curSetBits = curHighLong.PopCount(); curClearBits = (sizeof(long) * 8) - curSetBits; } // curHighLong has enough clear bits to reach highTarget, and may not have enough set bits. while (curHighLong == 0L) { setBitForIndex += (sizeof(long) * 8) - (setBitForIndex & ((sizeof(long) * 8) - 1)); if (Debugging.AssertsEnabled) { Debugging.Assert((highIndex + 1) == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE)); } highIndex += 1; upperLong = efEncoder.upperLongs[highIndex]; curHighLong = upperLong; } // curHighLong has enough clear bits to reach highTarget, has at least 1 set bit, and may not have enough set bits. int rank = (int)(highTarget - (setBitForIndex - efIndex)); // the rank of the zero bit for highValue. if (Debugging.AssertsEnabled) { Debugging.Assert((rank <= (sizeof(long) * 8)), "rank {0}", rank); } if (rank >= 1) { long invCurHighLong = ~curHighLong; int clearBitForValue = (rank <= 8) ? BroadWord.SelectNaive(invCurHighLong, rank) : BroadWord.Select(invCurHighLong, rank); if (Debugging.AssertsEnabled) { Debugging.Assert(clearBitForValue <= ((sizeof(long) * 8) - 1)); } setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero int oneBitsBeforeClearBit = clearBitForValue - rank + 1; efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex if (efIndex >= numEncoded) { return(NO_MORE_VALUES); } if ((setBitForIndex & ((sizeof(long) * 8) - 1)) == 0L) // exhausted curHighLong { if (Debugging.AssertsEnabled) { Debugging.Assert((highIndex + 1) == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE)); } highIndex += 1; upperLong = efEncoder.upperLongs[highIndex]; curHighLong = upperLong; } else { if (Debugging.AssertsEnabled) { Debugging.Assert(highIndex == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE)); } curHighLong = (long)((ulong)upperLong >> ((int)(setBitForIndex & ((sizeof(long) * 8) - 1)))); } // curHighLong has enough clear bits to reach highTarget, and may not have enough set bits. while (curHighLong == 0L) { setBitForIndex += (sizeof(long) * 8) - (setBitForIndex & ((sizeof(long) * 8) - 1)); if (Debugging.AssertsEnabled) { Debugging.Assert((highIndex + 1) == (int)((ulong)setBitForIndex >> LOG2_INT64_SIZE)); } highIndex += 1; upperLong = efEncoder.upperLongs[highIndex]; curHighLong = upperLong; } } setBitForIndex += curHighLong.TrailingZeroCount(); if (Debugging.AssertsEnabled) { Debugging.Assert((setBitForIndex - efIndex) >= highTarget); // highTarget reached } // Linear search also with low values long currentValue = CombineHighLowValues((setBitForIndex - efIndex), CurrentLowValue()); while (currentValue < target) { currentValue = NextValue(); if (currentValue == NO_MORE_VALUES) { return(NO_MORE_VALUES); } } return(currentValue); }
internal virtual void AddWord(int wordNum, byte word) { if (Debugging.AssertsEnabled) { Debugging.Assert(wordNum > lastWordNum); Debugging.Assert(word != 0); } if (!reverse) { if (lastWordNum == -1) { clean = 2 + wordNum; // special case for the 1st sequence dirtyWords.WriteByte(word); } else { switch (wordNum - lastWordNum) { case 1: if (word == 0xFF && (byte)dirtyWords.Bytes[dirtyWords.Length - 1] == 0xFF) { --dirtyWords.Length; WriteSequence(); reverse = true; clean = 2; } else { dirtyWords.WriteByte(word); } break; case 2: dirtyWords.WriteByte(0); dirtyWords.WriteByte(word); break; default: WriteSequence(); clean = wordNum - lastWordNum - 1; dirtyWords.WriteByte(word); break; } } } else { if (Debugging.AssertsEnabled) { Debugging.Assert(lastWordNum >= 0); } switch (wordNum - lastWordNum) { case 1: if (word == 0xFF) { if (dirtyWords.Length == 0) { ++clean; } else if ((byte)dirtyWords.Bytes[dirtyWords.Length - 1] == 0xFF) { --dirtyWords.Length; WriteSequence(); clean = 2; } else { dirtyWords.WriteByte(word); } } else { dirtyWords.WriteByte(word); } break; case 2: dirtyWords.WriteByte(0); dirtyWords.WriteByte(word); break; default: WriteSequence(); reverse = false; clean = wordNum - lastWordNum - 1; dirtyWords.WriteByte(word); break; } } lastWordNum = wordNum; cardinality += BitUtil.BitCount(word); }
internal void Flush(string fieldName, FieldsConsumer consumer, SegmentWriteState state) { if (!fieldInfo.IsIndexed) { return; // nothing to flush, don't bother the codec with the unindexed field } TermsConsumer termsConsumer = consumer.AddField(fieldInfo); IComparer <BytesRef> termComp = termsConsumer.Comparer; // CONFUSING: this.indexOptions holds the index options // that were current when we first saw this field. But // it's possible this has changed, eg when other // documents are indexed that cause a "downgrade" of the // IndexOptions. So we must decode the in-RAM buffer // according to this.indexOptions, but then write the // new segment to the directory according to // currentFieldIndexOptions: IndexOptions currentFieldIndexOptions = fieldInfo.IndexOptions; if (Debugging.AssertsEnabled) { Debugging.Assert(currentFieldIndexOptions != IndexOptions.NONE); } // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() bool writeTermFreq = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS) >= 0; bool writePositions = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; bool writeOffsets = IndexOptionsComparer.Default.Compare(currentFieldIndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; bool readTermFreq = this.hasFreq; bool readPositions = this.hasProx; bool readOffsets = this.hasOffsets; //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets); // Make sure FieldInfo.update is working correctly!: if (Debugging.AssertsEnabled) { Debugging.Assert(!writeTermFreq || readTermFreq); Debugging.Assert(!writePositions || readPositions); Debugging.Assert(!writeOffsets || readOffsets); Debugging.Assert(!writeOffsets || writePositions); } IDictionary <Term, int?> segDeletes; if (state.SegUpdates != null && state.SegUpdates.terms.Count > 0) { segDeletes = state.SegUpdates.terms; } else { segDeletes = null; } int[] termIDs = termsHashPerField.SortPostings(termComp); int numTerms = termsHashPerField.bytesHash.Count; BytesRef text = new BytesRef(); FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray; ByteSliceReader freq = new ByteSliceReader(); ByteSliceReader prox = new ByteSliceReader(); FixedBitSet visitedDocs = new FixedBitSet(state.SegmentInfo.DocCount); long sumTotalTermFreq = 0; long sumDocFreq = 0; Term protoTerm = new Term(fieldName); for (int i = 0; i < numTerms; i++) { int termID = termIDs[i]; // Get BytesRef int textStart = postings.textStarts[termID]; termsHashPerField.bytePool.SetBytesRef(text, textStart); termsHashPerField.InitReader(freq, termID, 0); if (readPositions || readOffsets) { termsHashPerField.InitReader(prox, termID, 1); } // TODO: really TermsHashPerField should take over most // of this loop, including merge sort of terms from // multiple threads and interacting with the // TermsConsumer, only calling out to us (passing us the // DocsConsumer) to handle delivery of docs/positions PostingsConsumer postingsConsumer = termsConsumer.StartTerm(text); int?delDocLimit; if (segDeletes != null) { protoTerm.Bytes = text; if (segDeletes.TryGetValue(protoTerm, out int?docIDUpto) && docIDUpto != null) { delDocLimit = docIDUpto; } else { delDocLimit = 0; } } else { delDocLimit = 0; } // Now termStates has numToMerge FieldMergeStates // which all share the same term. Now we must // interleave the docID streams. int docFreq = 0; long totalTermFreq = 0; int docID = 0; while (true) { //System.out.println(" cycle"); int termFreq; if (freq.Eof()) { if (postings.lastDocCodes[termID] != -1) { // Return last doc docID = postings.lastDocIDs[termID]; if (readTermFreq) { termFreq = postings.termFreqs[termID]; } else { termFreq = -1; } postings.lastDocCodes[termID] = -1; } else { // EOF break; } } else { int code = freq.ReadVInt32(); if (!readTermFreq) { docID += code; termFreq = -1; } else { docID += code.TripleShift(1); if ((code & 1) != 0) { termFreq = 1; } else { termFreq = freq.ReadVInt32(); } } if (Debugging.AssertsEnabled) { Debugging.Assert(docID != postings.lastDocIDs[termID]); } } docFreq++; if (Debugging.AssertsEnabled) { Debugging.Assert(docID < state.SegmentInfo.DocCount, "doc={0} maxDoc={1}", docID, state.SegmentInfo.DocCount); } // NOTE: we could check here if the docID was // deleted, and skip it. However, this is somewhat // dangerous because it can yield non-deterministic // behavior since we may see the docID before we see // the term that caused it to be deleted. this // would mean some (but not all) of its postings may // make it into the index, which'd alter the docFreq // for those terms. We could fix this by doing two // passes, ie first sweep marks all del docs, and // 2nd sweep does the real flush, but I suspect // that'd add too much time to flush. visitedDocs.Set(docID); postingsConsumer.StartDoc(docID, writeTermFreq ? termFreq : -1); if (docID < delDocLimit) { // Mark it deleted. TODO: we could also skip // writing its postings; this would be // deterministic (just for this Term's docs). // TODO: can we do this reach-around in a cleaner way???? if (state.LiveDocs == null) { state.LiveDocs = docState.docWriter.codec.LiveDocsFormat.NewLiveDocs(state.SegmentInfo.DocCount); } if (state.LiveDocs.Get(docID)) { state.DelCountOnFlush++; state.LiveDocs.Clear(docID); } } totalTermFreq += termFreq; // Carefully copy over the prox + payload info, // changing the format to match Lucene's segment // format. if (readPositions || readOffsets) { // we did record positions (& maybe payload) and/or offsets int position = 0; int offset = 0; for (int j = 0; j < termFreq; j++) { BytesRef thisPayload; if (readPositions) { int code = prox.ReadVInt32(); position += code.TripleShift(1); if ((code & 1) != 0) { // this position has a payload int payloadLength = prox.ReadVInt32(); if (payload == null) { payload = new BytesRef(); payload.Bytes = new byte[payloadLength]; } else if (payload.Bytes.Length < payloadLength) { payload.Grow(payloadLength); } prox.ReadBytes(payload.Bytes, 0, payloadLength); payload.Length = payloadLength; thisPayload = payload; } else { thisPayload = null; } if (readOffsets) { int startOffset = offset + prox.ReadVInt32(); int endOffset = startOffset + prox.ReadVInt32(); if (writePositions) { if (writeOffsets) { if (Debugging.AssertsEnabled) { Debugging.Assert(startOffset >= 0 && endOffset >= startOffset, "startOffset={0},endOffset={1},offset={2}", startOffset, endOffset, offset); } postingsConsumer.AddPosition(position, thisPayload, startOffset, endOffset); } else { postingsConsumer.AddPosition(position, thisPayload, -1, -1); } } offset = startOffset; } else if (writePositions) { postingsConsumer.AddPosition(position, thisPayload, -1, -1); } } } } postingsConsumer.FinishDoc(); } termsConsumer.FinishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1)); sumTotalTermFreq += totalTermFreq; sumDocFreq += docFreq; } termsConsumer.Finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.Cardinality()); }
/// <summary> /// Build a new <see cref="WAH8DocIdSet"/>. </summary> public virtual WAH8DocIdSet Build() { if (cardinality == 0) { if (Debugging.AssertsEnabled) { Debugging.Assert(lastWordNum == -1); } return(EMPTY); } WriteSequence(); byte[] data = Arrays.CopyOf(@out.Bytes, @out.Length); // Now build the index int valueCount = (numSequences - 1) / indexInterval + 1; MonotonicAppendingInt64Buffer indexPositions, indexWordNums; if (valueCount <= 1) { indexPositions = indexWordNums = SINGLE_ZERO_BUFFER; } else { const int pageSize = 128; int initialPageCount = (valueCount + pageSize - 1) / pageSize; MonotonicAppendingInt64Buffer positions = new MonotonicAppendingInt64Buffer(initialPageCount, pageSize, PackedInt32s.COMPACT); MonotonicAppendingInt64Buffer wordNums = new MonotonicAppendingInt64Buffer(initialPageCount, pageSize, PackedInt32s.COMPACT); positions.Add(0L); wordNums.Add(0L); Iterator it = new Iterator(data, cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER); if (Debugging.AssertsEnabled) { Debugging.Assert([email protected] == 0); Debugging.Assert(it.wordNum == -1); } for (int i = 1; i < valueCount; ++i) { // skip indexInterval sequences for (int j = 0; j < indexInterval; ++j) { bool readSequence = it.ReadSequence(); if (Debugging.AssertsEnabled) { Debugging.Assert(readSequence); } it.SkipDirtyBytes(); } int position = [email protected]; int wordNum = it.wordNum; positions.Add(position); wordNums.Add(wordNum + 1); } positions.Freeze(); wordNums.Freeze(); indexPositions = positions; indexWordNums = wordNums; } return(new WAH8DocIdSet(data, cardinality, indexInterval, indexPositions, indexWordNums)); }
public skyetek_hid(String path) { MyDebugging = new Debugging(); // For viewing results of API calls via Debug.Write. MyDeviceManagement = new DeviceManagement(); MyHid = new Hid(); readBuffer = new Queue<byte>(65); ReadFinished = false; ReadStart = 0; myDevicePathName = path; }
private void CorProcess_OnBreakpoint(object sender, Debugging.CorDebug.CorBreakpointEventArgs e) { //executeMDbgCommand("w"); // var sync = new AutoResetEvent(false); /* O2Thread.mtaThread(() => { var activeThread = CommandBase.Debugger.Processes.Active.Threads.Active; var filename = activeThread.BottomFrame.SourcePosition.Path; var line = activeThread.BottomFrame.SourcePosition.Line; O2Messages.raiseO2MDbgBreakEvent(filename, line); //sync.Set(); });*/ // first find the current thread (we need to use this.ActiveProcess.Threads since e.Thread doesn't give us the info we need //MDbgThread e.Thread.id // if (DateTime.Now.Millisecond < 100) /* if (DI.o2MDbg.LogBreakpointEvent) DI.log.info("*** BREAKPOINT >> " + O2MDbgBreakPoint.getActiveFrameFunctionName(e)); CorFrame activeFrame = e.Thread.ActiveFrame; activeFrame. //var activeThread = this.ActiveProcess.Threads.Active; // MDbgSourcePosition pos = CommandBase.Debugger.Processes.Active.Threads.Active.CurrentSourcePosition; activeFrame.Code.*/ // var filename = "filenameOfBreak"; // int line = 11; //sync.WaitOne(); e.Continue = DI.o2MDbg.AutoContinueOnBreakPointEvent; // can't do this here since CommandBase.Debugger.Processes.Active.Threads.Active is only set after this is executed }