private readonly Query _fromQuery; // Used for equals() only /// <summary> /// /// </summary> /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param> /// <param name="fromQuery"></param> /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param> internal TermsQuery(string field, Query fromQuery, BytesRefHash terms) : base(field) { _fromQuery = fromQuery; _terms = terms; _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer); }
public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo) { IntPool = termsHash.IntPool; BytePool = termsHash.BytePool; TermBytePool = termsHash.TermBytePool; DocState = termsHash.DocState; this.TermsHash = termsHash; BytesUsed = termsHash.BytesUsed; FieldState = docInverterPerField.FieldState; this.Consumer = termsHash.Consumer.AddField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed); BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts); StreamCount = Consumer.StreamCount; NumPostingInt = 2 * StreamCount; this.FieldInfo = fieldInfo; if (nextTermsHash != null) { NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo); } else { NextPerField = null; } }
public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo) { intPool = termsHash.intPool; bytePool = termsHash.bytePool; termBytePool = termsHash.termBytePool; docState = termsHash.docState; this.termsHash = termsHash; bytesUsed = termsHash.bytesUsed; fieldState = docInverterPerField.fieldState; this.consumer = termsHash.consumer.AddField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); streamCount = consumer.StreamCount; numPostingInt = 2 * streamCount; this.fieldInfo = fieldInfo; if (nextTermsHash != null) { nextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo); } else { nextPerField = null; } }
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.iwBytesUsed = iwBytesUsed; hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new BytesRefHash.DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = new AppendingDeltaPackedInt64Buffer(PackedInt32s.COMPACT); bytesUsed = pending.RamBytesUsed(); iwBytesUsed.AddAndGet(bytesUsed); }
internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum) { Terms = terms; Ords = ords; _comparator = BytesRef.UTF8SortedAsUnicodeComparer; _lastElement = terms.Size() - 1; _lastTerm = terms.Get(ords[_lastElement], new BytesRef()); _seekTerm = terms.Get(ords[_upto], _spare); }
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.FieldInfo = fieldInfo; this.IwBytesUsed = iwBytesUsed; Hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); Pending = new AppendingPackedLongBuffer(PackedInts.COMPACT); PendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT); BytesUsed = Pending.RamBytesUsed() + PendingCounts.RamBytesUsed(); iwBytesUsed.AddAndGet(BytesUsed); }
private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms, float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery) { _field = field; _multipleValuesPerDocument = multipleValuesPerDocument; _terms = terms; _scores = scores; _originalQuery = originalQuery; _ords = ords; _unwrittenOriginalQuery = unwrittenOriginalQuery; }
public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) { this.terms = terms; this.sliceArray = sliceArray; this.numTokens = numTokens; this.numOverlapTokens = numOverlapTokens; this.boost = boost; this.sumTotalTermFreq = sumTotalTermFreq; this.lastPosition = lastPosition; this.lastOffset = lastOffset; }
internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms, float[] scores, Query originalQuery) { _field = field; _multipleValuesPerDocument = multipleValuesPerDocument; _terms = terms; _scores = scores; _originalQuery = originalQuery; _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer); _unwrittenOriginalQuery = originalQuery; }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { // Get the enum and start visiting terms. If we // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: int docCountCutoff = (int)((docCountPercent / 100.0) * reader.MaxDoc); int termCountLimit = Math.Min(BooleanQuery.MaxClauseCount, termCountCutoff); CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit); CollectTerms(reader, query, col); int size = col.pendingTerms.Count; if (col.hasCutOff) { return(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.Rewrite(reader, query)); } else { BooleanQuery bq = GetTopLevelQuery(); if (size > 0) { BytesRefHash pendingTerms = col.pendingTerms; int[] sort = pendingTerms.Sort(col.termsEnum.Comparer); for (int i = 0; i < size; i++) { int pos = sort[i]; // docFreq is not used for constant score here, we pass 1 // to explicitely set a fake value, so it's not calculated AddClause(bq, new Term(query.m_field, pendingTerms.Get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); } } // Strip scores Query result = new ConstantScoreQuery(bq); result.Boost = query.Boost; return(result); } }
public SynonymMap(FST <BytesRef> fst, BytesRefHash words, int maxHorizontalContext) { this.fst = fst; this.words = words; this.maxHorizontalContext = maxHorizontalContext; }
/// <summary> /// Iterates over the given token stream and adds the resulting terms to the index; /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, /// Lucene <see cref="Documents.Field"/>. /// Finally closes the token stream. Note that untokenized keywords can be added with this method via /// <see cref="T:KeywordTokenStream{T}(ICollection{T}"/>)"/>, the Lucene <c>KeywordTokenizer</c> or similar utilities. /// /// </summary> /// <param name="fieldName"> a name to be associated with the text </param> /// <param name="stream"> the token stream to retrieve tokens from. </param> /// <param name="boost"> the boost factor for hits for this field </param> /// <param name="positionIncrementGap"> the position increment gap if fields with the same name are added more than once </param> /// <param name="offsetGap"> the offset gap if fields with the same name are added more than once </param> /// <seealso cref="Documents.Field.Boost"/> public virtual void AddField(string fieldName, TokenStream stream, float boost, int positionIncrementGap, int offsetGap) { try { if (fieldName == null) { throw new ArgumentException("fieldName must not be null"); } if (stream == null) { throw new ArgumentException("token stream must not be null"); } if (boost <= 0.0f) { throw new ArgumentException("boost factor must be greater than 0.0"); } int numTokens = 0; int numOverlapTokens = 0; int pos = -1; BytesRefHash terms; SliceByteStartArray sliceArray; long sumTotalTermFreq = 0; int offset = 0; if (fields.TryGetValue(fieldName, out Info info)) { numTokens = info.numTokens; numOverlapTokens = info.numOverlapTokens; pos = info.lastPosition + positionIncrementGap; offset = info.lastOffset + offsetGap; terms = info.terms; boost *= info.boost; sliceArray = info.sliceArray; sumTotalTermFreq = info.sumTotalTermFreq; } else { sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY); terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray); } if (!fieldInfos.ContainsKey(fieldName)) { fieldInfos[fieldName] = new FieldInfo(fieldName, true, fieldInfos.Count, false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.NONE, DocValuesType.NONE, null); } ITermToBytesRefAttribute termAtt = stream.GetAttribute <ITermToBytesRefAttribute>(); IPositionIncrementAttribute posIncrAttribute = stream.AddAttribute <IPositionIncrementAttribute>(); IOffsetAttribute offsetAtt = stream.AddAttribute <IOffsetAttribute>(); BytesRef @ref = termAtt.BytesRef; stream.Reset(); while (stream.IncrementToken()) { termAtt.FillBytesRef(); // if (DEBUG) System.err.println("token='" + term + "'"); numTokens++; int posIncr = posIncrAttribute.PositionIncrement; if (posIncr == 0) { numOverlapTokens++; } pos += posIncr; int ord = terms.Add(@ref); if (ord < 0) { ord = (-ord) - 1; postingsWriter.Reset(sliceArray.end[ord]); } else { sliceArray.start[ord] = postingsWriter.StartNewSlice(); } sliceArray.freq[ord]++; sumTotalTermFreq++; if (!storeOffsets) { postingsWriter.WriteInt32(pos); } else { postingsWriter.WriteInt32(pos); postingsWriter.WriteInt32(offsetAtt.StartOffset + offset); postingsWriter.WriteInt32(offsetAtt.EndOffset + offset); } sliceArray.end[ord] = postingsWriter.CurrentOffset; } stream.End(); // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() if (numTokens > 0) { fields[fieldName] = new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.EndOffset + offset, sumTotalTermFreq); sortedFields = null; // invalidate sorted view, if any } } // can never happen catch (Exception e) { throw new Exception(e.ToString(), e); } finally { try { if (stream != null) { stream.Dispose(); } } catch (IOException e2) { throw new Exception(e2.ToString(), e2); } } }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestRandomSortedBytes() { Directory dir = NewDirectory(); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if (!DefaultCodecSupportsDocsWithField()) { // if the codec doesnt support missing, we expect missing to be mapped to byte[] // by the impersonator, but we have to give it a chance to merge them to this cfg.SetMergePolicy(NewLogMergePolicy()); } RandomIndexWriter w = new RandomIndexWriter(Random(), dir, cfg); int numDocs = AtLeast(100); BytesRefHash hash = new BytesRefHash(); IDictionary<string, string> docToString = new Dictionary<string, string>(); int maxLength = TestUtil.NextInt(Random(), 1, 50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("id", "" + i, Field.Store.YES)); string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength); BytesRef br = new BytesRef(@string); doc.Add(new SortedDocValuesField("field", br)); hash.Add(br); docToString["" + i] = @string; w.AddDocument(doc); } if (Rarely()) { w.Commit(); } int numDocsNoValue = AtLeast(10); for (int i = 0; i < numDocsNoValue; i++) { Document doc = new Document(); doc.Add(NewTextField("id", "noValue", Field.Store.YES)); w.AddDocument(doc); } if (!DefaultCodecSupportsDocsWithField()) { BytesRef bytesRef = new BytesRef(); hash.Add(bytesRef); // add empty value for the gaps } if (Rarely()) { w.Commit(); } if (!DefaultCodecSupportsDocsWithField()) { // if the codec doesnt support missing, we expect missing to be mapped to byte[] // by the impersonator, but we have to give it a chance to merge them to this w.ForceMerge(1); } for (int i = 0; i < numDocs; i++) { Document doc = new Document(); string id = "" + i + numDocs; doc.Add(NewTextField("id", id, Field.Store.YES)); string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength); BytesRef br = new BytesRef(@string); hash.Add(br); docToString[id] = @string; doc.Add(new SortedDocValuesField("field", br)); w.AddDocument(doc); } w.Commit(); IndexReader reader = w.Reader; SortedDocValues docValues = MultiDocValues.GetSortedValues(reader, "field"); int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer); BytesRef expected = new BytesRef(); BytesRef actual = new BytesRef(); Assert.AreEqual(hash.Size(), docValues.ValueCount); for (int i = 0; i < hash.Size(); i++) { hash.Get(sort[i], expected); docValues.LookupOrd(i, actual); Assert.AreEqual(expected.Utf8ToString(), actual.Utf8ToString()); int ord = docValues.LookupTerm(expected); Assert.AreEqual(i, ord); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(reader); ISet<KeyValuePair<string, string>> entrySet = docToString.EntrySet(); foreach (KeyValuePair<string, string> entry in entrySet) { // pk lookup DocsEnum termDocsEnum = slowR.TermDocsEnum(new Term("id", entry.Key)); int docId = termDocsEnum.NextDoc(); expected = new BytesRef(entry.Value); docValues.Get(docId, actual); Assert.AreEqual(expected, actual); } reader.Dispose(); w.Dispose(); dir.Dispose(); }
public ParallelArraysTermCollector(ScoringRewrite <Q> outerInstance) { this.outerInstance = outerInstance; terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); }
private void InitializeInstanceFields() { pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); }
internal int BinarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords, IComparer <BytesRef> comparer) { int mid; // LUCENENET: IDE0059: Remove unnecessary value assignment while (low <= high) { mid = (low + high).TripleShift(1); hash.Get(ords[mid], bytesRef); int cmp = comparer.Compare(bytesRef, b); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return(mid); } } if (Debugging.AssertsEnabled) { Debugging.Assert(comparer.Compare(bytesRef, b) != 0); } return(-(low + 1)); }
internal int BinarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords, IComparer <BytesRef> comparer) { int mid = 0; while (low <= high) { mid = (int)((uint)(low + high) >> 1); hash.Get(ords[mid], bytesRef); int cmp = comparer.Compare(bytesRef, b); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return(mid); } } Debug.Assert(comparer.Compare(bytesRef, b) != 0); return(-(low + 1)); }
internal void InitializeInstanceFields() { terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); }
public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext) { this.fst = fst; this.words = words; this.maxHorizontalContext = maxHorizontalContext; }