/// <summary> /// Enumerates all terms greater/equal than <code>lowerTerm</code> /// but less/equal than <code>upperTerm</code>. /// /// If an endpoint is null, it is said to be "open". Either or both /// endpoints may be open. Open endpoints may not be exclusive /// (you can't select all but the first or last term without /// explicitly specifying the term to exclude.) /// </summary> /// <param name="tenum"> /// TermsEnum to filter </param> /// <param name="lowerTerm"> /// The term text at the lower end of the range </param> /// <param name="upperTerm"> /// The term text at the upper end of the range </param> /// <param name="includeLower"> /// If true, the <code>lowerTerm</code> is included in the range. </param> /// <param name="includeUpper"> /// If true, the <code>upperTerm</code> is included in the range. </param> public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm, bool includeLower, bool includeUpper) : base(tenum) { // do a little bit of normalization... // open ended range queries should always be inclusive. if (lowerTerm == null) { this.LowerBytesRef = new BytesRef(); this.IncludeLower = true; } else { this.LowerBytesRef = lowerTerm; this.IncludeLower = includeLower; } if (upperTerm == null) { this.IncludeUpper = true; UpperBytesRef = null; } else { this.IncludeUpper = includeUpper; UpperBytesRef = upperTerm; } InitialSeekTerm = LowerBytesRef; TermComp = Comparator; }
public BytesRef GetPayload() { if (payloadLength <= 0) { return(null); // no payload } if (needToLoadPayload) { // read payloads lazily if (payload == null) { payload = new BytesRef(payloadLength); } else { payload.Grow(payloadLength); } proxStream.ReadBytes(payload.Bytes, payload.Offset, payloadLength); payload.Length = payloadLength; needToLoadPayload = false; } return(payload); }
/// <summary> /// Factory that creates a new <see cref="TermRangeFilter"/> using <see cref="string"/>s for term text. /// </summary> public static TermRangeFilter NewStringRange(string field, string lowerTerm, string upperTerm, bool includeLower, bool includeUpper) { BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm); BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm); return(new TermRangeFilter(field, lower, upper, includeLower, includeUpper)); }
public MockVariableLengthPayloadFilter(Random random, TokenStream @in) : base(@in) { this.Random = random; this.Payload = new BytesRef(Bytes); this.PayloadAtt = AddAttribute<IPayloadAttribute>(); }
/// <summary> /// If <paramref name="key"/> exists, returns its ordinal, else /// returns <c>-insertionPoint-1</c>, like /// <see cref="System.Array.BinarySearch(System.Array, int, int, object)"/> /// </summary> /// <param name="key"> Key to look up</param> public virtual int LookupTerm(BytesRef key) { BytesRef spare = new BytesRef(); int low = 0; int high = ValueCount - 1; while (low <= high) { int mid = (int)((uint)(low + high) >> 1); LookupOrd(mid, spare); int cmp = spare.CompareTo(key); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return(mid); // key found } } return(-(low + 1)); // key not found. }
public virtual void AddValue(int docID, BytesRef value) { if (value == null) { throw new System.ArgumentException("field \"" + FieldInfo.Name + "\": null value not allowed"); } if (value.Length > (ByteBlockPool.BYTE_BLOCK_SIZE - 2)) { throw new System.ArgumentException("DocValuesField \"" + FieldInfo.Name + "\" is too large, must be <= " + (ByteBlockPool.BYTE_BLOCK_SIZE - 2)); } if (docID != CurrentDoc) { FinishCurrentDoc(); } // Fill in any holes: while (CurrentDoc < docID) { PendingCounts.Add(0); // no values CurrentDoc++; } AddOneValue(value); UpdateBytesUsed(); }
public override void Get(int docID, Int32sRef ordinals) { BytesRef bytes = new BytesRef(); values.Get(docID, bytes); outerInstance.Decode(bytes, ordinals); }
private void SumValues(IList<FacetsCollector.MatchingDocs> matchingDocs) { //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName); foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { BinaryDocValues dv = hits.context.AtomicReader.GetBinaryDocValues(IndexFieldName); if (dv == null) // this reader does not have DocValues for the requested category list { continue; } DocIdSetIterator docs = hits.bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); // TODO: use OrdinalsReader? we'd need to add a // BytesRef getAssociation()? BytesRef bytesRef = new BytesRef(); dv.Get(doc, bytesRef); byte[] bytes = bytesRef.Bytes; int end = bytesRef.Offset + bytesRef.Length; int offset = bytesRef.Offset; while (offset < end) { int ord = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF); offset += 4; int value = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF); offset += 4; values[ord] += Number.IntBitsToFloat(value); } } } }
public virtual void TestFixedBinary() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); Document doc = new Document(); var bytes = new byte[4]; BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 24); bytes[1] = (byte)(i >> 16); bytes[2] = (byte)(i >> 8); bytes[3] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 24); bytes[1] = (byte)(expectedValue >> 16); bytes[2] = (byte)(expectedValue >> 8); bytes[3] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
// like clear() but doesn't clear termBuffer/text private void ClearNoTermBuffer() { payload = null; positionIncrement = 1; flags = 0; startOffset = endOffset = 0; type = Tokenattributes.TypeAttribute_Fields.DEFAULT_TYPE; }
private DocTermOrdsRangeFilter(string field, BytesRef lowerVal, BytesRef upperVal, bool includeLower, bool includeUpper) { this.Field_Renamed = field; this.LowerVal_Renamed = lowerVal; this.UpperVal_Renamed = upperVal; this.IncludeLower = includeLower; this.IncludeUpper = includeUpper; }
/// <summary> /// Resets the term text, payload, flags, and positionIncrement, /// startOffset, endOffset and token type to default. /// </summary> public override void Clear() { base.Clear(); payload = null; positionIncrement = 1; flags = 0; startOffset = endOffset = 0; type = Tokenattributes.TypeAttribute_Fields.DEFAULT_TYPE; }
/// <summary> /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. </summary> /// <param name="prototype"> existing Token </param> /// <param name="newTerm"> new term text </param> public virtual void Reinit(Token prototype, string newTerm) { SetEmpty().Append(newTerm); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; endOffset = prototype.endOffset; type = prototype.type; payload = prototype.payload; }
public BinaryDocValuesFieldUpdates(string field, int maxDoc) : base(field, Type_e.BINARY) { DocsWithField = new FixedBitSet(64); Docs = new PagedMutable(1, 1024, PackedInts.BitsRequired(maxDoc - 1), PackedInts.COMPACT); Offsets = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); Lengths = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); Values = new BytesRef(16); // start small Size = 0; }
/// <summary> /// Copy the prototype token's fields into this one. Note: Payloads are shared. </summary> /// <param name="prototype"> source Token to copy fields from </param> public virtual void Reinit(Token prototype) { CopyBuffer(prototype.Buffer(), 0, prototype.Length); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; endOffset = prototype.endOffset; type = prototype.type; payload = prototype.payload; }
/// <summary> /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. </summary> /// <param name="prototype"> existing Token </param> /// <param name="newTermBuffer"> buffer containing new term text </param> /// <param name="offset"> the index in the buffer of the first character </param> /// <param name="length"> number of valid characters in the buffer </param> public virtual void Reinit(Token prototype, char[] newTermBuffer, int offset, int length) { CopyBuffer(newTermBuffer, offset, length); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; endOffset = prototype.endOffset; type = prototype.type; payload = prototype.payload; }
protected internal override AcceptStatus Accept(BytesRef term) { if (StringHelper.StartsWith(term, PrefixRef)) { return AcceptStatus.YES; } else { return AcceptStatus.END; } }
protected internal override AcceptStatus Accept(BytesRef term) { if (StringHelper.StartsWith(term, PrefixRef)) { return(AcceptStatus.YES); } else { return(AcceptStatus.END); } }
private void ReadVectors() { termAndPostings = new TermAndPostings[numTerms]; BytesRef lastTerm = new BytesRef(); for (int i = 0; i < numTerms; i++) { TermAndPostings t = new TermAndPostings(); BytesRef term = new BytesRef(); term.CopyBytes(lastTerm); int start = tvf.ReadVInt32(); int deltaLen = tvf.ReadVInt32(); term.Length = start + deltaLen; term.Grow(term.Length); tvf.ReadBytes(term.Bytes, start, deltaLen); t.Term = term; int freq = tvf.ReadVInt32(); t.Freq = freq; if (storePositions) { int[] positions = new int[freq]; int pos = 0; for (int posUpto = 0; posUpto < freq; posUpto++) { int delta = tvf.ReadVInt32(); if (delta == -1) { delta = 0; // LUCENE-1542 correction } pos += delta; positions[posUpto] = pos; } t.Positions = positions; } if (storeOffsets) { int[] startOffsets = new int[freq]; int[] endOffsets = new int[freq]; int offset = 0; for (int posUpto = 0; posUpto < freq; posUpto++) { startOffsets[posUpto] = offset + tvf.ReadVInt32(); offset = endOffsets[posUpto] = startOffsets[posUpto] + tvf.ReadVInt32(); } t.StartOffsets = startOffsets; t.EndOffsets = endOffsets; } lastTerm.CopyBytes(term); termAndPostings[i] = t; } }
/// <summary> /// Shorthand for calling <seealso cref="#clear"/>, /// <seealso cref="#copyBuffer(char[], int, int)"/>, /// <seealso cref="#setOffset"/>, /// <seealso cref="#setType"/> </summary> /// <returns> this Token instance </returns> public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, string newType) { CheckOffsets(newStartOffset, newEndOffset); ClearNoTermBuffer(); CopyBuffer(newTermBuffer, newTermOffset, newTermLength); payload = null; positionIncrement = 1; startOffset = newStartOffset; endOffset = newEndOffset; type = newType; return(this); }
public MockFixedLengthPayloadFilter(Random random, TokenStream @in, int length) : base(@in) { if (length < 0) { throw new System.ArgumentException("length must be >= 0"); } this.Random = random; this.Bytes = new byte[length]; this.Payload = new BytesRef(Bytes); this.PayloadAtt = AddAttribute<IPayloadAttribute>(); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); IList<long?> numbers = new List<long?>(); IList<BytesRef> binary = new List<BytesRef>(); IList<BytesRef> sorted = new List<BytesRef>(); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Document d = new Document(); long number = Random().NextLong(); d.Add(new NumericDocValuesField("number", number)); BytesRef bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); d.Add(new BinaryDocValuesField("bytes", bytes)); binary.Add(bytes); bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); d.Add(new SortedDocValuesField("sorted", bytes)); sorted.Add(bytes); w.AddDocument(d); numbers.Add(number); } w.ForceMerge(1); IndexReader r = w.Reader; w.Dispose(); Assert.AreEqual(1, r.Leaves.Count); AtomicReader ar = (AtomicReader)r.Leaves[0].Reader; int numThreads = TestUtil.NextInt(Random(), 2, 5); IList<ThreadClass> threads = new List<ThreadClass>(); CountDownLatch startingGun = new CountDownLatch(1); for (int t = 0; t < numThreads; t++) { Random threadRandom = new Random(Random().Next()); ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, numbers, binary, sorted, numDocs, ar, startingGun, threadRandom); thread.Start(); threads.Add(thread); } startingGun.countDown(); foreach (ThreadClass thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public virtual DocsAndPositionsEnum GetDocsAndPositions(AtomicReader reader, BytesRef bytes, Bits liveDocs) { Terms terms = reader.Terms(FieldName); if (terms != null) { TermsEnum te = terms.Iterator(null); if (te.SeekExact(bytes)) { return te.DocsAndPositions(liveDocs, null); } } return null; }
public override void Get(int docID, BytesRef result) { int ord = GetOrd(docID); if (ord == -1) { result.Bytes = BytesRef.EMPTY_BYTES; result.Length = 0; result.Offset = 0; } else { LookupOrd(ord, result); } }
/// <summary> /// Creates this from <paramref name="dim"/> and <paramref name="path"/> and an /// association /// </summary> public AssociationFacetField(BytesRef assoc, string dim, params string[] path) : base("dummy", TYPE) { FacetField.VerifyLabel(dim); foreach (string label in path) { FacetField.VerifyLabel(label); } this.Dim = dim; this.Assoc = assoc; if (path.Length == 0) { throw new System.ArgumentException("path must have at least one element"); } this.Path = path; }
public virtual void TestBinary() { Directory dir = NewDirectory(); Document doc = new Document(); BytesRef @ref = new BytesRef(); Field field = new BinaryDocValuesField("bytes", @ref); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { @ref.CopyChars(TestUtil.RandomUnicodeString(Random())); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes"); BinaryDocValues single = merged.GetBinaryDocValues("bytes"); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); for (int i = 0; i < numDocs; i++) { single.Get(i, expected); multi.Get(i, actual); Assert.AreEqual(expected, actual); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
// NOTE: slow! (linear scan) public override SeekStatus SeekCeil(BytesRef text) { IComparer <BytesRef> comparer = Comparer; for (int i = 0; i < numTerms; i++) { int cmp = comparer.Compare(text, termAndPostings[i].Term); if (cmp < 0) { currentTerm = i; return(SeekStatus.NOT_FOUND); } else if (cmp == 0) { currentTerm = i; return(SeekStatus.FOUND); } } currentTerm = termAndPostings.Length; return(SeekStatus.END); }
protected internal virtual void ProcessPayload(Similarity similarity) { if (TermSpans.PayloadAvailable) { DocsAndPositionsEnum postings = TermSpans.Postings; Payload = postings.Payload; if (Payload != null) { PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, DocScorer.ComputePayloadFactor(Doc, Spans.Start(), Spans.End(), Payload)); } else { PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, 1F); } PayloadsSeen++; } else { // zero out the payload? } }
protected internal virtual void ProcessPayload(Similarity similarity) { if (termSpans.IsPayloadAvailable) { DocsAndPositionsEnum postings = termSpans.Postings; m_payload = postings.GetPayload(); if (m_payload != null) { m_payloadScore = outerInstance.outerInstance.m_function.CurrentScore(m_doc, outerInstance.outerInstance.Term.Field, m_spans.Start, m_spans.End, m_payloadsSeen, m_payloadScore, m_docScorer.ComputePayloadFactor(m_doc, m_spans.Start, m_spans.End, m_payload)); } else { m_payloadScore = outerInstance.outerInstance.m_function.CurrentScore(m_doc, outerInstance.outerInstance.Term.Field, m_spans.Start, m_spans.End, m_payloadsSeen, m_payloadScore, 1F); } m_payloadsSeen++; } else { // zero out the payload? } }
public virtual void TestBinary() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); BytesRef bytes = new BytesRef(2); BinaryTokenStream tokenStream = new BinaryTokenStream(bytes); for (int i = 0; i < 256; i++) { bytes.Bytes[0] = (byte)i; bytes.Bytes[1] = unchecked((byte)(255 - i)); bytes.Length = 2; Document doc = new Document(); FieldType customType = new FieldType(); customType.Stored = true; doc.Add(new Field("id", "" + i, customType)); doc.Add(new TextField("bytes", tokenStream)); iw.AddDocument(doc); } IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); for (int i = 0; i < 256; i++) { bytes.Bytes[0] = (byte)i; bytes.Bytes[1] = unchecked((byte)(255 - i)); bytes.Length = 2; TopDocs docs = @is.Search(new TermQuery(new Term("bytes", bytes)), 5); Assert.AreEqual(1, docs.TotalHits); Assert.AreEqual("" + i, @is.Doc(docs.ScoreDocs[0].Doc).Get("id")); } ir.Dispose(); dir.Dispose(); }
/// <summary> /// Subclass & override if you change the encoding. /// </summary> protected virtual void Decode(BytesRef buf, Int32sRef ordinals) { // grow the buffer up front, even if by a large number of values (buf.length) // that saves the need to check inside the loop for every decoded value if // the buffer needs to grow. if (ordinals.Int32s.Length < buf.Length) { ordinals.Int32s = ArrayUtil.Grow(ordinals.Int32s, buf.Length); } ordinals.Offset = 0; ordinals.Length = 0; // it is better if the decoding is inlined like so, and not e.g. // in a utility method int upto = buf.Offset + buf.Length; int value = 0; int offset = buf.Offset; int prev = 0; while (offset < upto) { byte b = buf.Bytes[offset++]; if ((sbyte)b >= 0) { ordinals.Int32s[ordinals.Length] = ((value << 7) | b) + prev; value = 0; prev = ordinals.Int32s[ordinals.Length]; ordinals.Length++; } else { value = (value << 7) | (b & 0x7F); } } }
public override void Clear() { MaxNonCompetitiveBoost_Renamed = float.NegativeInfinity; CompetitiveTerm_Renamed = null; }
public override void Decompress(DataInput @in, int originalLength, int offset, int length, BytesRef bytes) { Debug.Assert(offset + length <= originalLength); if (bytes.Bytes.Length < originalLength) { bytes.Bytes = new byte[ArrayUtil.Oversize(originalLength, 1)]; } @in.ReadBytes(bytes.Bytes, 0, offset + length); bytes.Offset = offset; bytes.Length = length; }
internal Iterator(int size, PagedGrowableWriter offsets, PagedGrowableWriter lengths, PagedMutable docs, BytesRef values, FixedBitSet docsWithField) { this.Offsets = offsets; this.Size = size; this.Lengths = lengths; this.Docs = docs; this.DocsWithField = docsWithField; Value_Renamed = (BytesRef)values.Clone(); }
public override float ComputePayloadFactor(int doc, int start, int end, BytesRef payload) { return(1f); }
public override void Get(int docID, BytesRef result) { result.Bytes = BytesRef.EMPTY_BYTES; result.Offset = 0; result.Length = 0; }
/// <summary> /// Constructs a filter for field <paramref name="fieldName"/> matching /// greater than or equal to <paramref name="lowerTerm"/>. /// </summary> public static TermRangeFilter More(string fieldName, BytesRef lowerTerm) { return(new TermRangeFilter(fieldName, lowerTerm, null, true, false)); }
internal DumbPrefixQuery(TestPrefixRandom outerInstance, Term term) : base(term.Field) { this.OuterInstance = outerInstance; Prefix = term.Bytes; }
public virtual void TestVariableBinary([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BVariableBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); var bytes = new byte[4]; ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes); BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { encoder.Reset(bytes); encoder.WriteVInt(i % 65535); // 1, 2, or 3 bytes data.Length = encoder.Position; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; ByteArrayDataInput input = new ByteArrayDataInput(); foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(bytes); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { dv.Get(i, scratch); input.Reset((byte[])(Array)scratch.Bytes, scratch.Offset, scratch.Length); Assert.AreEqual(expectedValue % 65535, input.ReadVInt()); Assert.IsTrue(input.Eof()); expectedValue++; } } r.Dispose(); dir.Dispose(); }
/// <summary> /// Constructs a filter for field <paramref name="fieldName"/> matching /// less than or equal to <paramref name="upperTerm"/>. /// </summary> public static TermRangeFilter Less(string fieldName, BytesRef upperTerm) { return(new TermRangeFilter(fieldName, null, upperTerm, false, true)); }
/// <param name="fieldName"> The field this range applies to </param> /// <param name="lowerTerm"> The lower bound on this range </param> /// <param name="upperTerm"> The upper bound on this range </param> /// <param name="includeLower"> Does this range include the lower bound? </param> /// <param name="includeUpper"> Does this range include the upper bound? </param> /// <exception cref="System.ArgumentException"> if both terms are <c>null</c> or if /// lowerTerm is <c>null</c> and includeLower is <c>true</c> (similar for upperTerm /// and includeUpper) </exception> public TermRangeFilter(string fieldName, BytesRef lowerTerm, BytesRef upperTerm, bool includeLower, bool includeUpper) : base(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper)) { }
public override void StartTerm(BytesRef term, int freq) { int prefix = StringHelper.BytesDifference(LastTerm, term); int suffix = term.Length - prefix; Tvf.WriteVInt(prefix); Tvf.WriteVInt(suffix); Tvf.WriteBytes(term.Bytes, term.Offset + prefix, suffix); Tvf.WriteVInt(freq); LastTerm.CopyBytes(term); LastPosition = LastOffset = 0; if (Offsets && Positions) { // we might need to buffer if its a non-bulk merge OffsetStartBuffer = ArrayUtil.Grow(OffsetStartBuffer, freq); OffsetEndBuffer = ArrayUtil.Grow(OffsetEndBuffer, freq); OffsetIndex = 0; OffsetFreq = freq; } }
public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload) { Debug.Assert(payload == null); if (Positions && Offsets) { // write position delta Tvf.WriteVInt(position - LastPosition); LastPosition = position; // buffer offsets OffsetStartBuffer[OffsetIndex] = startOffset; OffsetEndBuffer[OffsetIndex] = endOffset; OffsetIndex++; // dump buffer if we are done if (OffsetIndex == OffsetFreq) { for (int i = 0; i < OffsetIndex; i++) { Tvf.WriteVInt(OffsetStartBuffer[i] - LastOffset); Tvf.WriteVInt(OffsetEndBuffer[i] - OffsetStartBuffer[i]); LastOffset = OffsetEndBuffer[i]; } } } else if (Positions) { // write position delta Tvf.WriteVInt(position - LastPosition); LastPosition = position; } else if (Offsets) { // write offset deltas Tvf.WriteVInt(startOffset - LastOffset); Tvf.WriteVInt(endOffset - startOffset); LastOffset = endOffset; } }
/// <summary> /// Creates a BytesRef range filter using <seealso cref="IFieldCache#getTermsIndex"/>. this works with all /// fields containing zero or one term in the field. The range can be half-open by setting one /// of the values to <code>null</code>. /// </summary> public static DocTermOrdsRangeFilter NewBytesRefRange(string field, BytesRef lowerVal, BytesRef upperVal, bool includeLower, bool includeUpper) { return new DocTermOrdsRangeFilterAnonymousInnerClassHelper(field, lowerVal, upperVal, includeLower, includeUpper); }
internal SimplePrefixTermsEnum(TestPrefixRandom.DumbPrefixQuery outerInstance, TermsEnum tenum, BytesRef prefix) : base(tenum) { this.OuterInstance = outerInstance; this.Prefix = prefix; InitialSeekTerm = new BytesRef(""); }
public DocTermOrdsRangeFilterAnonymousInnerClassHelper(string field, BytesRef lowerVal, BytesRef upperVal, bool includeLower, bool includeUpper) : base(field, lowerVal, upperVal, includeLower, includeUpper) { this.Field = field; this.LowerVal = lowerVal; this.UpperVal = upperVal; this.IncludeLower = includeLower; this.IncludeUpper = includeUpper; }
protected internal override AcceptStatus Accept(BytesRef term) { return StringHelper.StartsWith(term, Prefix) ? AcceptStatus.YES : AcceptStatus.NO; }
public override void LookupOrd(int ord, BytesRef result) { result.Bytes = BytesRef.EMPTY_BYTES; result.Offset = 0; result.Length = 0; }
public PrefixTermsEnum(TermsEnum tenum, BytesRef prefixText) : base(tenum) { InitialSeekTerm = this.PrefixRef = prefixText; }
public override void LookupOrd(long ord, BytesRef result) { throw new System.IndexOutOfRangeException(); }
public override float ScorePayload(int docId, int start, int end, BytesRef payload) { //we know it is size 4 here, so ignore the offset/length return payload.Bytes[payload.Offset]; }
/// <summary> /// Retrieves the value for the specified ordinal. </summary> /// <param name="ord"> ordinal to lookup (must be >= 0 and < <see cref="ValueCount"/>) </param> /// <param name="result"> will be populated with the ordinal's value </param> /// <seealso cref="GetOrd(int)"/> public abstract void LookupOrd(int ord, BytesRef result);
public override void Decompress(DataInput @in, int originalLength, int offset, int length, BytesRef bytes) { if (Debugging.AssertsEnabled) { Debugging.Assert(offset + length <= originalLength); } // add 7 padding bytes, this is not necessary but can help decompression run faster if (bytes.Bytes.Length < originalLength + 7) { bytes.Bytes = new byte[ArrayUtil.Oversize(originalLength + 7, 1)]; } int decompressedLength = LZ4.Decompress(@in, offset + length, bytes.Bytes, 0); if (decompressedLength > originalLength) { throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength + " (resource=" + @in + ")"); } bytes.Offset = offset; bytes.Length = length; }
/// <summary> /// Calculate a scoring factor based on the data in the payload. </summary> public abstract float ComputePayloadFactor(int doc, int start, int end, BytesRef payload);
public override void Decompress(DataInput input, int originalLength, int offset, int length, BytesRef bytes) { if (Debugging.AssertsEnabled) { Debugging.Assert(offset + length <= originalLength); } if (length == 0) { bytes.Length = 0; return; } byte[] compressedBytes = new byte[input.ReadVInt32()]; input.ReadBytes(compressedBytes, 0, compressedBytes.Length); byte[] decompressedBytes = null; using (MemoryStream decompressedStream = new MemoryStream()) { using (MemoryStream compressedStream = new MemoryStream(compressedBytes)) { using (DeflateStream dStream = new DeflateStream(compressedStream, System.IO.Compression.CompressionMode.Decompress)) { dStream.CopyTo(decompressedStream); } } decompressedBytes = decompressedStream.ToArray(); } if (decompressedBytes.Length != originalLength) { throw new CorruptIndexException("Length mismatch: " + decompressedBytes.Length + " != " + originalLength + " (resource=" + input + ")"); } bytes.Bytes = decompressedBytes; bytes.Offset = offset; bytes.Length = length; }
/// <summary> /// Decompress bytes that were stored between offsets <paramref name="offset"/> and /// <c>offset+length</c> in the original stream from the compressed /// stream <paramref name="in"/> to <paramref name="bytes"/>. After returning, the length /// of <paramref name="bytes"/> (<c>bytes.Length</c>) must be equal to /// <paramref name="length"/>. Implementations of this method are free to resize /// <paramref name="bytes"/> depending on their needs. /// </summary> /// <param name="in"> The input that stores the compressed stream. </param> /// <param name="originalLength"> The length of the original data (before compression). </param> /// <param name="offset"> Bytes before this offset do not need to be decompressed. </param> /// <param name="length"> Bytes after <c>offset+length</c> do not need to be decompressed. </param> /// <param name="bytes"> a <see cref="BytesRef"/> where to store the decompressed data. </param> public abstract void Decompress(DataInput @in, int originalLength, int offset, int length, BytesRef bytes);
public virtual void TestLengthPrefixAcrossTwoPages() { Directory d = NewDirectory(); IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); var bytes = new byte[32764]; BytesRef b = new BytesRef(); b.Bytes = bytes; b.Length = bytes.Length; doc.Add(new SortedDocValuesField("field", b)); w.AddDocument(doc); bytes[0] = 1; w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; BinaryDocValues s = FieldCache.DEFAULT.GetTerms(GetOnlySegmentReader(r), "field", false); BytesRef bytes1 = new BytesRef(); s.Get(0, bytes1); Assert.AreEqual(bytes.Length, bytes1.Length); bytes[0] = 0; Assert.AreEqual(b, bytes1); s.Get(1, bytes1); Assert.AreEqual(bytes.Length, bytes1.Length); bytes[0] = 1; Assert.AreEqual(b, bytes1); r.Dispose(); w.Dispose(); d.Dispose(); }
public virtual void TestTooLargeTermSortedSetBytes() { AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); byte[] bytes = new byte[100000]; BytesRef b = new BytesRef(bytes); Random().NextBytes((byte[])(Array)bytes); doc.Add(new SortedSetDocValuesField("dv", b)); try { iwriter.AddDocument(doc); Assert.Fail("did not get expected exception"); } catch (System.ArgumentException expected) { // expected } iwriter.Dispose(); directory.Dispose(); }
/// <summary> /// Lookup the value for document. </summary> public abstract void Get(int docID, BytesRef result);