public object Clone() { // LUCENENET: MemberwiseClone() doesn't throw in .NET TermBuffer clone = (TermBuffer)base.MemberwiseClone(); clone.bytes = BytesRef.DeepCopyOf(bytes); return(clone); }
public Term ToTerm() { if (field is null) // unset { return(null); } return(term ?? (term = new Term(field, BytesRef.DeepCopyOf(bytes)))); }
public object Clone() { TermBuffer clone = null; try { clone = (TermBuffer)base.MemberwiseClone(); } #pragma warning disable 168 catch (InvalidOperationException e) #pragma warning restore 168 { } clone.bytes = BytesRef.DeepCopyOf(bytes); return(clone); }
public virtual void TestAllUnicodeChars() { BytesRef utf8 = new BytesRef(10); CharsRef utf16 = new CharsRef(10); char[] chars = new char[2]; for (int ch = 0; ch < 0x0010FFFF; ch++) { if (ch == 0xd800) // Skip invalid code points { ch = 0xe000; } int len = 0; if (ch <= 0xffff) { chars[len++] = (char)ch; } else { chars[len++] = (char)(((ch - 0x0010000) >> 10) + UnicodeUtil.UNI_SUR_HIGH_START); chars[len++] = (char)(((ch - 0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START); } UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); string s1 = new string(chars, 0, len); string s2 = Encoding.UTF8.GetString(utf8.Bytes, utf8.Offset, utf8.Length); Assert.AreEqual(s1, s2, "codepoint " + ch); UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16); Assert.AreEqual(s1, new string(utf16.Chars, 0, utf16.Length), "codepoint " + ch); var b = s1.GetBytes(Encoding.UTF8); Assert.AreEqual(utf8.Length, b.Length); for (int j = 0; j < utf8.Length; j++) { Assert.AreEqual(utf8.Bytes[j], b[j]); } } }
public override sealed void Get(int docID, BytesRef result) { Get((long)docID, result); }
private void CheckTermsOrder(IndexReader r, ISet<string> allTerms, bool isTop) { TermsEnum terms = MultiFields.GetFields(r).Terms("f").Iterator(null); BytesRef last = new BytesRef(); HashSet<string> seenTerms = new HashSet<string>(); while (true) { BytesRef term = terms.Next(); if (term == null) { break; } Assert.IsTrue(last.CompareTo(term) < 0); last.CopyBytes(term); string s = term.Utf8ToString(); Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")"); seenTerms.Add(s); } if (isTop) { Assert.IsTrue(allTerms.SetEquals(seenTerms)); } // Test seeking: IEnumerator<string> it = seenTerms.GetEnumerator(); while (it.MoveNext()) { BytesRef tr = new BytesRef(it.Current); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString())); } }
private void AddDoc(IndexWriter writer, int id) { Document doc = new Document(); doc.Add(new TextField("content", "aaa", Field.Store.NO)); doc.Add(new StringField("id", Convert.ToString(id), Field.Store.YES)); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; doc.Add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.Add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.Add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); doc.Add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); // add numeric fields, to test if flex preserves encoding doc.Add(new IntField("trieInt", id, Field.Store.NO)); doc.Add(new LongField("trieLong", (long)id, Field.Store.NO)); // add docvalues fields doc.Add(new NumericDocValuesField("dvByte", (sbyte)id)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef @ref = new BytesRef(bytes); doc.Add(new BinaryDocValuesField("dvBytesDerefFixed", @ref)); doc.Add(new BinaryDocValuesField("dvBytesDerefVar", @ref)); doc.Add(new SortedDocValuesField("dvBytesSortedFixed", @ref)); doc.Add(new SortedDocValuesField("dvBytesSortedVar", @ref)); doc.Add(new BinaryDocValuesField("dvBytesStraightFixed", @ref)); doc.Add(new BinaryDocValuesField("dvBytesStraightVar", @ref)); doc.Add(new DoubleDocValuesField("dvDouble", (double)id)); doc.Add(new FloatDocValuesField("dvFloat", (float)id)); doc.Add(new NumericDocValuesField("dvInt", id)); doc.Add(new NumericDocValuesField("dvLong", id)); doc.Add(new NumericDocValuesField("dvPacked", id)); doc.Add(new NumericDocValuesField("dvShort", (short)id)); // a field with both offsets and term vectors for a cross-check FieldType customType3 = new FieldType(TextField.TYPE_STORED); customType3.StoreTermVectors = true; customType3.StoreTermVectorPositions = true; customType3.StoreTermVectorOffsets = true; customType3.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; doc.Add(new Field("content5", "here is more content with aaa aaa aaa", customType3)); // a field that omits only positions FieldType customType4 = new FieldType(TextField.TYPE_STORED); customType4.StoreTermVectors = true; customType4.StoreTermVectorPositions = false; customType4.StoreTermVectorOffsets = true; customType4.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS; doc.Add(new Field("content6", "here is more content with aaa aaa aaa", customType4)); // TODO: // index different norms types via similarity (we use a random one currently?!) // remove any analyzer randomness, explicitly add payloads for certain fields. writer.AddDocument(doc); }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; Bits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList<IndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.Terms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef(bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #21 since it's norm was // increased: Document d_ = searcher.IndexReader.Document(hits[0].Doc); Assert.AreEqual("didn't get the right document first", "21", d_.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
internal FileAndTop(int fd, byte[] firstLine) { this.Fd = fd; this.Current = new BytesRef(firstLine); }
public virtual void TestComplexSpanChecks() { SpanTermQuery one = new SpanTermQuery(new Term("field", "one")); SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand")); //should be one position in between SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred")); SpanTermQuery three = new SpanTermQuery(new Term("field", "three")); SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[] { one, thous }, 0, true); SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[] { hundred, three }, 0, true); SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[] { oneThous, hundredThree }, 1, true); SpanQuery query; //this one's too small query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2); CheckHits(query, new int[] { }); //this one's just right query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6); CheckHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 }); var payloads = new List<byte[]>(); BytesRef pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8)); BytesRef pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8)); BytesRef pay3 = new BytesRef(("pos: " + 3).GetBytes(IOUtils.CHARSET_UTF_8)); BytesRef pay4 = new BytesRef(("pos: " + 4).GetBytes(IOUtils.CHARSET_UTF_8)); payloads.Add(pay.Bytes); payloads.Add(pay2.Bytes); payloads.Add(pay3.Bytes); payloads.Add(pay4.Bytes); query = new SpanNearPayloadCheckQuery(oneThousHunThree, payloads); CheckHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 }); }
public override void Get(long id, BytesRef result) { try { TermsEnum_Renamed.SeekExact(id); BytesRef term = TermsEnum_Renamed.Term(); result.Bytes = term.Bytes; result.Offset = term.Offset; result.Length = term.Length; } catch (Exception e) { throw; } }
/// <summary> /// Adds a new <see cref="BytesRef"/> /// </summary> /// <param name="bytes"> /// The bytes to hash </param> /// <returns> The id the given bytes are hashed if there was no mapping for the /// given bytes, otherwise <c>(-(id)-1)</c>. this guarantees /// that the return value will always be >= 0 if the given bytes /// haven't been hashed before. /// </returns> /// <exception cref="MaxBytesLengthExceededException"> /// if the given bytes are > 2 + /// <see cref="ByteBlockPool.BYTE_BLOCK_SIZE"/> </exception> public int Add(BytesRef bytes) { Debug.Assert(bytesStart != null, "Bytesstart is null - not initialized"); int length = bytes.Length; // final position int hashPos = FindHash(bytes); int e = ids[hashPos]; if (e == -1) { // new entry int len2 = 2 + bytes.Length; if (len2 + pool.ByteUpto > ByteBlockPool.BYTE_BLOCK_SIZE) { if (len2 > ByteBlockPool.BYTE_BLOCK_SIZE) { throw new MaxBytesLengthExceededException("bytes can be at most " + (ByteBlockPool.BYTE_BLOCK_SIZE - 2) + " in length; got " + bytes.Length); } pool.NextBuffer(); } var buffer = pool.Buffer; int bufferUpto = pool.ByteUpto; if (count >= bytesStart.Length) { bytesStart = bytesStartArray.Grow(); Debug.Assert(count < bytesStart.Length + 1, "count: " + count + " len: " + bytesStart.Length); } e = count++; bytesStart[e] = bufferUpto + pool.ByteOffset; // We first encode the length, followed by the // bytes. Length is encoded as vInt, but will consume // 1 or 2 bytes at most (we reject too-long terms, // above). if (length < 128) { // 1 byte to store length buffer[bufferUpto] = (byte)length; pool.ByteUpto += length + 1; Debug.Assert(length >= 0, "Length must be positive: " + length); System.Buffer.BlockCopy(bytes.Bytes, bytes.Offset, buffer, bufferUpto + 1, length); } else { // 2 byte to store length buffer[bufferUpto] = (byte)(0x80 | (length & 0x7f)); buffer[bufferUpto + 1] = (byte)((length >> 7) & 0xff); pool.ByteUpto += length + 2; System.Buffer.BlockCopy(bytes.Bytes, bytes.Offset, buffer, bufferUpto + 2, length); } Debug.Assert(ids[hashPos] == -1); ids[hashPos] = e; if (count == hashHalfSize) { Rehash(2 * hashSize, true); } return(e); } return(-(e + 1)); }
private bool Equals(int id, BytesRef b) { pool.SetBytesRef(scratch1, bytesStart[id]); return(scratch1.BytesEquals(b)); }
/// <summary> /// Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>. Otherwise /// <c>false</c>. /// </summary> /// <param name="ref"> /// The <see cref="BytesRef"/> to test. </param> /// <param name="suffix"> /// The expected suffix </param> /// <returns> Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>. /// Otherwise <c>false</c>. </returns> public static bool EndsWith(BytesRef @ref, BytesRef suffix) { return(SliceEquals(@ref, suffix, @ref.Length - suffix.Length)); }
/// <summary> /// Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>. /// Otherwise <c>false</c>. /// </summary> /// <param name="ref"> /// The <see cref="BytesRef"/> to test. </param> /// <param name="prefix"> /// The expected prefix </param> /// <returns> Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>. /// Otherwise <c>false</c>. </returns> public static bool StartsWith(BytesRef @ref, BytesRef prefix) { return(SliceEquals(@ref, prefix, 0)); }
/// <summary> /// Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>. Otherwise /// <c>false</c>. /// </summary> /// <param name="ref"> /// The <see cref="BytesRef"/> to test. </param> /// <param name="suffix"> /// The expected suffix </param> /// <returns> Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>. /// Otherwise <c>false</c>. </returns> public static bool EndsWith(BytesRef @ref, BytesRef suffix) // LUCENENET TODO: API - convert to extension method { return(SliceEquals(@ref, suffix, @ref.Length - suffix.Length)); }
/// <summary> /// Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>. /// Otherwise <c>false</c>. /// </summary> /// <param name="ref"> /// The <see cref="BytesRef"/> to test. </param> /// <param name="prefix"> /// The expected prefix </param> /// <returns> Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>. /// Otherwise <c>false</c>. </returns> public static bool StartsWith(BytesRef @ref, BytesRef prefix) // LUCENENET TODO: API - convert to extension method { return(SliceEquals(@ref, prefix, 0)); }
public override sealed bool IncrementToken() { bool hasNext = input.IncrementToken(); if (!hasNext) { return false; } // Some values of the same field are to have payloads and others not if (Offset + Length <= Data.Length && !TermAttribute.ToString().EndsWith("NO PAYLOAD")) { BytesRef p = new BytesRef(Data, Offset, Length); PayloadAtt.Payload = p; Offset += Length; } else { PayloadAtt.Payload = null; } return true; }
public override void Get(long id, BytesRef result) { long address = Bytes.Offset + id * Bytes.MaxLength; try { Data.Seek(address); // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) // assume "they" own the bytes after calling this! var buffer = new byte[Bytes.MaxLength]; Data.ReadBytes(buffer, 0, buffer.Length); result.Bytes = buffer; result.Offset = 0; result.Length = buffer.Length; } catch (Exception) { throw; } }
public override void LookupOrd(int ord, BytesRef result) { Binary.Get(ord, result); }
/// <summary> /// Returns the id of the given <see cref="BytesRef"/>. /// </summary> /// <param name="bytes"> /// The bytes to look for /// </param> /// <returns> The id of the given bytes, or <c>-1</c> if there is no mapping for the /// given bytes. </returns> public int Find(BytesRef bytes) { return(ids[FindHash(bytes)]); }
public TermsEnumAnonymousInnerClassHelper(CompressedBinaryDocValues outerInstance, IndexInput input) { this.OuterInstance = outerInstance; this.Input = input; currentOrd = -1; termBuffer = new BytesRef(outerInstance.Bytes.MaxLength < 0 ? 0 : outerInstance.Bytes.MaxLength); term = new BytesRef(); }
public static bool StartsWith(this BytesRef @ref, BytesRef prefix) // LUCENENET specific - converted to extension method { return(SliceEquals(@ref, prefix, 0)); }
internal static Term DeepCopyOf(Term other) { return(new Term(other.Field, BytesRef.DeepCopyOf(other.Bytes))); }
public static bool EndsWith(this BytesRef @ref, BytesRef suffix) // LUCENENET specific - converted to extension method { return(SliceEquals(@ref, suffix, @ref.Length - suffix.Length)); }
/// <summary> /// Returns prefix coded bits after reducing the precision by <code>shift</code> bits. /// this is method is used by <seealso cref="NumericTokenStream"/>. /// After encoding, {@code bytes.offset} will always be 0. </summary> /// <param name="val"> the numeric value </param> /// <param name="shift"> how many bits to strip from the right </param> /// <param name="bytes"> will contain the encoded value </param> public static void IntToPrefixCoded(int val, int shift, BytesRef bytes) { IntToPrefixCodedBytes(val, shift, bytes); }
public static int Murmurhash3_x86_32(BytesRef bytes, int seed) { return(Murmurhash3_x86_32(bytes.Bytes, bytes.Offset, bytes.Length, seed)); }
public virtual void TestNextIntoWrongField() { foreach (string name in OldNames) { Directory dir = OldIndexDirs[name]; IndexReader r = DirectoryReader.Open(dir); TermsEnum terms = MultiFields.GetFields(r).Terms("content").Iterator(null); BytesRef t = terms.Next(); Assert.IsNotNull(t); // content field only has term aaa: Assert.AreEqual("aaa", t.Utf8ToString()); Assert.IsNull(terms.Next()); BytesRef aaaTerm = new BytesRef("aaa"); // should be found exactly Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm)); Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, 0))); Assert.IsNull(terms.Next()); // should hit end of field Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("bbb"))); Assert.IsNull(terms.Next()); // should seek to aaa Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, terms.SeekCeil(new BytesRef("a"))); Assert.IsTrue(terms.Term().BytesEquals(aaaTerm)); Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, 0))); Assert.IsNull(terms.Next()); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm)); Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, 0))); Assert.IsNull(terms.Next()); r.Dispose(); } }
public virtual void TestDataInputOutput2() { Random random = Random; for (int iter = 0; iter < 5 * RandomMultiplier; iter++) { int blockBits = TestUtil.NextInt32(random, 1, 20); int blockSize = 1 << blockBits; PagedBytes p = new PagedBytes(blockBits); DataOutput @out = p.GetDataOutput(); int numBytes = LuceneTestCase.Random.Next(10000000); byte[] answer = new byte[numBytes]; LuceneTestCase.Random.NextBytes(answer); int written = 0; while (written < numBytes) { if (LuceneTestCase.Random.Next(10) == 7) { @out.WriteByte(answer[written++]); } else { int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - written); @out.WriteBytes(answer, written, chunk); written += chunk; } } PagedBytes.Reader reader = p.Freeze(random.NextBoolean()); DataInput @in = p.GetDataInput(); byte[] verify = new byte[numBytes]; int read = 0; while (read < numBytes) { if (LuceneTestCase.Random.Next(10) == 7) { verify[read++] = @in.ReadByte(); } else { int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - read); @in.ReadBytes(verify, read, chunk); read += chunk; } } Assert.IsTrue(Arrays.Equals(answer, verify)); BytesRef slice = new BytesRef(); for (int iter2 = 0; iter2 < 100; iter2++) { int pos = random.Next(numBytes - 1); int len = random.Next(Math.Min(blockSize + 1, numBytes - pos)); reader.FillSlice(slice, pos, len); for (int byteUpto = 0; byteUpto < len; byteUpto++) { Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]); } } } }
public virtual void TestRandomUnicodeStrings() { char[] buffer = new char[20]; char[] expected = new char[20]; BytesRef utf8 = new BytesRef(20); CharsRef utf16 = new CharsRef(20); int num = AtLeast(100000); for (int iter = 0; iter < num; iter++) { bool hasIllegal = FillUnicode(buffer, expected, 0, 20); UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8); if (!hasIllegal) { var b = (new string(buffer, 0, 20)).GetBytes(IOUtils.CHARSET_UTF_8); Assert.AreEqual(b.Length, utf8.Length); for (int i = 0; i < b.Length; i++) { Assert.AreEqual(b[i], utf8.Bytes[i]); } } UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16); Assert.AreEqual(utf16.Length, 20); for (int i = 0; i < 20; i++) { Assert.AreEqual(expected[i], utf16.Chars[i]); } } }
public virtual void TestDataInputOutput() { Random random = Random; for (int iter = 0; iter < 5 * RandomMultiplier; iter++) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } int blockBits = TestUtil.NextInt32(random, 1, 20); int blockSize = 1 << blockBits; PagedBytes p = new PagedBytes(blockBits); IndexOutput @out = dir.CreateOutput("foo", IOContext.DEFAULT); int numBytes = TestUtil.NextInt32(LuceneTestCase.Random, 2, 10000000); byte[] answer = new byte[numBytes]; LuceneTestCase.Random.NextBytes(answer); int written = 0; while (written < numBytes) { if (LuceneTestCase.Random.Next(10) == 7) { @out.WriteByte(answer[written++]); } else { int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - written); @out.WriteBytes(answer, written, chunk); written += chunk; } } @out.Dispose(); IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT); DataInput @in = (DataInput)input.Clone(); p.Copy(input, input.Length); PagedBytes.Reader reader = p.Freeze(random.NextBoolean()); byte[] verify = new byte[numBytes]; int read = 0; while (read < numBytes) { if (LuceneTestCase.Random.Next(10) == 7) { verify[read++] = @in.ReadByte(); } else { int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - read); @in.ReadBytes(verify, read, chunk); read += chunk; } } Assert.IsTrue(Arrays.Equals(answer, verify)); BytesRef slice = new BytesRef(); for (int iter2 = 0; iter2 < 100; iter2++) { int pos = random.Next(numBytes - 1); int len = random.Next(Math.Min(blockSize + 1, numBytes - pos)); reader.FillSlice(slice, pos, len); for (int byteUpto = 0; byteUpto < len; byteUpto++) { Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]); } } input.Dispose(); dir.Dispose(); } }
public virtual void TestAddByPoolOffset() { BytesRef @ref = new BytesRef(); BytesRef scratch = new BytesRef(); BytesRefHash offsetHash = NewHash(pool); int num = AtLeast(2); for (int j = 0; j < num; j++) { ISet <string> strings = new JCG.HashSet <string>(); int uniqueCount = 0; for (int i = 0; i < 797; i++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random, 1000); } while (str.Length == 0); @ref.CopyChars(str); int count = hash.Count; int key = hash.Add(@ref); if (key >= 0) { Assert.IsTrue(strings.Add(str)); Assert.AreEqual(uniqueCount, key); Assert.AreEqual(hash.Count, count + 1); int offsetKey = offsetHash.AddByPoolOffset(hash.ByteStart(key)); Assert.AreEqual(uniqueCount, offsetKey); Assert.AreEqual(offsetHash.Count, count + 1); uniqueCount++; } else { Assert.IsFalse(strings.Add(str)); Assert.IsTrue((-key) - 1 < count); Assert.AreEqual(str, hash.Get((-key) - 1, scratch).Utf8ToString()); Assert.AreEqual(count, hash.Count); int offsetKey = offsetHash.AddByPoolOffset(hash.ByteStart((-key) - 1)); Assert.IsTrue((-offsetKey) - 1 < count); Assert.AreEqual(str, hash.Get((-offsetKey) - 1, scratch).Utf8ToString()); Assert.AreEqual(count, hash.Count); } } AssertAllIn(strings, hash); foreach (string @string in strings) { @ref.CopyChars(@string); int key = hash.Add(@ref); BytesRef bytesRef = offsetHash.Get((-key) - 1, scratch); Assert.AreEqual(@ref, bytesRef); } hash.Clear(); Assert.AreEqual(0, hash.Count); offsetHash.Clear(); Assert.AreEqual(0, offsetHash.Count); hash.Reinit(); // init for the next round offsetHash.Reinit(); } }
public BytesRefIteratorAnonymousInnerClassHelper(BytesRefArray outerInstance, IComparer <BytesRef> comp, BytesRef spare, int size, int[] indices) { this.OuterInstance = outerInstance; this.Comp = comp; this.Spare = spare; this.Size = size; this.Indices = indices; pos = 0; }
public abstract void Get(long id, BytesRef Result);
internal static byte[] Decompress(Decompressor decompressor, byte[] compressed, int originalLength) { BytesRef bytes = new BytesRef(); decompressor.Decompress(new ByteArrayDataInput(compressed), originalLength, 0, originalLength, bytes); return Arrays.CopyOfRange(bytes.Bytes, bytes.Offset, bytes.Offset + bytes.Length); }
public override void Get(long id, BytesRef result) { long startAddress = Bytes.Offset + (id == 0 ? 0 : Addresses.Get(id - 1)); long endAddress = Bytes.Offset + Addresses.Get(id); int length = (int)(endAddress - startAddress); try { Data.Seek(startAddress); // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) // assume "they" own the bytes after calling this! var buffer = new byte[length]; Data.ReadBytes(buffer, 0, buffer.Length); result.Bytes = buffer; result.Offset = 0; result.Length = length; } catch (Exception) { throw; } }
internal virtual byte[] Decompress(byte[] compressed, int originalLength, int offset, int length) { Decompressor decompressor = Mode.NewDecompressor(); BytesRef bytes = new BytesRef(); decompressor.Decompress(new ByteArrayDataInput(compressed), originalLength, offset, length, bytes); return Arrays.CopyOfRange(bytes.Bytes, bytes.Offset, bytes.Offset + bytes.Length); }
public override int LookupTerm(BytesRef key) { if (Binary is CompressedBinaryDocValues) { return (int)((CompressedBinaryDocValues)Binary).LookupTerm(key); } else { return base.LookupTerm(key); } }
/// <summary> /// Seeks to the specified term, if it exists, or to the /// next (ceiling) term. Returns SeekStatus to /// indicate whether exact term was found, a different /// term was found, or EOF was hit. The target term may /// be before or after the current term. If this returns /// SeekStatus.END, the enum is unpositioned. /// </summary> public abstract SeekStatus SeekCeil(BytesRef text);
internal virtual long LookupTerm(BytesRef key) { try { TermsEnum.SeekStatus status = TermsEnum_Renamed.SeekCeil(key); if (status == TermsEnum.SeekStatus.END) { return -NumValues - 1; } else if (status == TermsEnum.SeekStatus.FOUND) { return TermsEnum_Renamed.Ord(); } else { return -TermsEnum_Renamed.Ord() - 1; } } catch (Exception) { throw; } }
public static void BeforeClass() { NUM_DOCS = AtLeast(500); NUM_ORDS = AtLeast(2); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; UnicodeStrings = new string[NUM_DOCS]; MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS]; if (VERBOSE) { Console.WriteLine("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO)); doc.Add(NewStringField("theDouble", Convert.ToString(theDouble--), Field.Store.NO)); doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO)); doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO)); doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO)); doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO)); if (i % 2 == 0) { doc.Add(NewStringField("sparse", Convert.ToString(i), Field.Store.NO)); } if (i % 2 == 0) { doc.Add(new IntField("numInt", i, Field.Store.NO)); } // sometimes skip the field: if (Random().Next(40) != 17) { UnicodeStrings[i] = GenerateString(i); doc.Add(NewStringField("theRandomUnicodeString", UnicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (Random().Next(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { string newValue = GenerateString(i); MultiValued[i, j] = new BytesRef(newValue); doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Array.Sort(MultiValued[i]); } writer.AddDocument(doc); } IndexReader r = writer.Reader; Reader = SlowCompositeReaderWrapper.Wrap(r); writer.Dispose(); }
public override TermsEnum.SeekStatus SeekCeil(BytesRef text) { // binary-search just the index values to find the block, // then scan within the block long low = 0; long high = OuterInstance.NumIndexValues - 1; while (low <= high) { long mid = (int)((uint)(low + high) >> 1); DoSeek(mid * OuterInstance.Interval); int cmp = termBuffer.CompareTo(text); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { // we got lucky, found an indexed term SetTerm(); return TermsEnum.SeekStatus.FOUND; } } if (OuterInstance.NumIndexValues == 0) { return TermsEnum.SeekStatus.END; } // block before insertion point long block = low - 1; DoSeek(block < 0 ? -1 : block * OuterInstance.Interval); while (DoNext() != null) { int cmp = termBuffer.CompareTo(text); if (cmp == 0) { SetTerm(); return TermsEnum.SeekStatus.FOUND; } else if (cmp > 0) { SetTerm(); return TermsEnum.SeekStatus.NOT_FOUND; } } return TermsEnum.SeekStatus.END; }
public virtual void TestDocValuesIntegration() { AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues()); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.Add(new NumericDocValuesField("numeric", 42)); if (DefaultCodecSupportsSortedSet()) { doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); } iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); BytesRef scratch = new BytesRef(); // Binary type: can be retrieved via getTerms() try { FieldCache_Fields.DEFAULT.GetInts(ar, "binary", false); Assert.Fail(); } catch (InvalidOperationException expected) { } BinaryDocValues binary = FieldCache_Fields.DEFAULT.GetTerms(ar, "binary", true); binary.Get(0, scratch); Assert.AreEqual("binary value", scratch.Utf8ToString()); try { FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "binary"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "binary"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "binary"); Assert.Fail(); } catch (InvalidOperationException expected) { } Bits bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "binary"); Assert.IsTrue(bits.Get(0)); // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() try { FieldCache_Fields.DEFAULT.GetInts(ar, "sorted", false); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "sorted"); Assert.Fail(); } catch (InvalidOperationException expected) { } binary = FieldCache_Fields.DEFAULT.GetTerms(ar, "sorted", true); binary.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedDocValues sorted = FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "sorted"); Assert.AreEqual(0, sorted.GetOrd(0)); Assert.AreEqual(1, sorted.ValueCount); sorted.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedSetDocValues sortedSet = FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "sorted"); sortedSet.Document = 0; Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.ValueCount); bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "sorted"); Assert.IsTrue(bits.Get(0)); // Numeric type: can be retrieved via getInts() and so on Ints numeric = FieldCache_Fields.DEFAULT.GetInts(ar, "numeric", false); Assert.AreEqual(42, numeric.Get(0)); try { FieldCache_Fields.DEFAULT.GetTerms(ar, "numeric", true); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "numeric"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "numeric"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "numeric"); Assert.Fail(); } catch (InvalidOperationException expected) { } bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "numeric"); Assert.IsTrue(bits.Get(0)); // SortedSet type: can be retrieved via getDocTermOrds() if (DefaultCodecSupportsSortedSet()) { try { FieldCache_Fields.DEFAULT.GetInts(ar, "sortedset", false); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetTerms(ar, "sortedset", true); Assert.Fail(); } catch (InvalidOperationException expected) { } try { FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "sortedset"); Assert.Fail(); } catch (InvalidOperationException expected) { } try { new DocTermOrds(ar, null, "sortedset"); Assert.Fail(); } catch (InvalidOperationException expected) { } sortedSet = FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "sortedset"); sortedSet.Document = 0; Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(2, sortedSet.ValueCount); bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "sortedset"); Assert.IsTrue(bits.Get(0)); } ir.Dispose(); dir.Dispose(); }
public virtual void TestSpanPayloadCheck() { SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); BytesRef pay = new BytesRef(("pos: " + 5).GetBytes(IOUtils.CHARSET_UTF_8)); SpanQuery query = new SpanPayloadCheckQuery(term1, new List<byte[]>() { pay.Bytes }); CheckHits(query, new int[] { 1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995 }); Assert.IsTrue(Searcher.Explain(query, 1125).Value > 0.0f); SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred")); SpanNearQuery snq; SpanQuery[] clauses; IList<byte[]> list; BytesRef pay2; clauses = new SpanQuery[2]; clauses[0] = term1; clauses[1] = term2; snq = new SpanNearQuery(clauses, 0, true); pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8)); pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8)); list = new List<byte[]>(); list.Add(pay.Bytes); list.Add(pay2.Bytes); query = new SpanNearPayloadCheckQuery(snq, list); CheckHits(query, new int[] { 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599 }); clauses = new SpanQuery[3]; clauses[0] = term1; clauses[1] = term2; clauses[2] = new SpanTermQuery(new Term("field", "five")); snq = new SpanNearQuery(clauses, 0, true); pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8)); pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8)); BytesRef pay3 = new BytesRef(("pos: " + 2).GetBytes(IOUtils.CHARSET_UTF_8)); list = new List<byte[]>(); list.Add(pay.Bytes); list.Add(pay2.Bytes); list.Add(pay3.Bytes); query = new SpanNearPayloadCheckQuery(snq, list); CheckHits(query, new int[] { 505 }); }
/// <summary> /// Overwrite this method, if you like to receive the already prefix encoded range bounds. /// You can directly build classical range (inclusive) queries from them. /// </summary> public virtual void AddRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { throw new System.NotSupportedException(); }
/// <summary> /// Attempts to seek to the exact term, returning /// true if the term is found. If this returns false, the /// enum is unpositioned. For some codecs, seekExact may /// be substantially faster than <seealso cref="#seekCeil"/>. /// </summary> public virtual bool SeekExact(BytesRef text) { return SeekCeil(text) == SeekStatus.FOUND; }
protected internal override AcceptStatus Accept(BytesRef term) { return(NumericUtils.GetPrefixCodedIntShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END); }
public virtual void Test() { FieldCache cache = FieldCache_Fields.DEFAULT; FieldCache_Fields.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random().NextBoolean()); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache_Fields.DEFAULT_DOUBLE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i)); } FieldCache_Fields.Longs longs = cache.GetLongs(Reader, "theLong", Random().NextBoolean()); Assert.AreSame(longs, cache.GetLongs(Reader, "theLong", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(longs, cache.GetLongs(Reader, "theLong", FieldCache_Fields.DEFAULT_LONG_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i); } FieldCache_Fields.Bytes bytes = cache.GetBytes(Reader, "theByte", Random().NextBoolean()); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache_Fields.DEFAULT_BYTE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i)); } FieldCache_Fields.Shorts shorts = cache.GetShorts(Reader, "theShort", Random().NextBoolean()); Assert.AreSame(shorts, cache.GetShorts(Reader, "theShort", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(shorts, cache.GetShorts(Reader, "theShort", FieldCache_Fields.DEFAULT_SHORT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i)); } FieldCache_Fields.Ints ints = cache.GetInts(Reader, "theInt", Random().NextBoolean()); Assert.AreSame(ints, cache.GetInts(Reader, "theInt", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(ints, cache.GetInts(Reader, "theInt", FieldCache_Fields.DEFAULT_INT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i)); } FieldCache_Fields.Floats floats = cache.GetFloats(Reader, "theFloat", Random().NextBoolean()); Assert.AreSame(floats, cache.GetFloats(Reader, "theFloat", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(floats, cache.GetFloats(Reader, "theFloat", FieldCache_Fields.DEFAULT_FLOAT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i)); } Bits docsWithField = cache.GetDocsWithField(Reader, "theLong"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array"); Assert.IsTrue(docsWithField is Bits_MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length() == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length() + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length(); i++) { Assert.IsTrue(docsWithField.Get(i)); } docsWithField = cache.GetDocsWithField(Reader, "sparse"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array"); Assert.IsFalse(docsWithField is Bits_MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length() == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length() + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length(); i++) { Assert.AreEqual(i % 2 == 0, docsWithField.Get(i)); } // getTermsIndex SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString"); Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array"); BytesRef br = new BytesRef(); for (int i = 0; i < NUM_DOCS; i++) { BytesRef term; int ord = termsIndex.GetOrd(i); if (ord == -1) { term = null; } else { termsIndex.LookupOrd(ord, br); term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } int nTerms = termsIndex.ValueCount; TermsEnum tenum = termsIndex.TermsEnum(); BytesRef val = new BytesRef(); for (int i = 0; i < nTerms; i++) { BytesRef val1 = tenum.Next(); termsIndex.LookupOrd(i, val); // System.out.println("i="+i); Assert.AreEqual(val, val1); } // seek the enum around (note this isn't a great test here) int num = AtLeast(100); for (int i = 0; i < num; i++) { int k = Random().Next(nTerms); termsIndex.LookupOrd(k, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term()); } for (int i = 0; i < nTerms; i++) { termsIndex.LookupOrd(i, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term()); } // test bad field termsIndex = cache.GetTermsIndex(Reader, "bogusfield"); // getTerms BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true); Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array"); Bits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString"); for (int i = 0; i < NUM_DOCS; i++) { terms.Get(i, br); BytesRef term; if (!bits.Get(i)) { term = null; } else { term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } // test bad field terms = cache.GetTerms(Reader, "bogusfield", false); // getDocTermOrds SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); int numEntries = cache.CacheEntries.Length; // ask for it again, and check that we didnt create any additional entries: termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); Assert.AreEqual(numEntries, cache.CacheEntries.Length); for (int i = 0; i < NUM_DOCS; i++) { termOrds.Document = i; // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId IList<BytesRef> values = new List<BytesRef>(new /*Linked*/HashSet<BytesRef>(Arrays.AsList(MultiValued[i]))); foreach (BytesRef v in values) { if (v == null) { // why does this test use null values... instead of an empty list: confusing break; } long ord = termOrds.NextOrd(); Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS); BytesRef scratch = new BytesRef(); termOrds.LookupOrd(ord, scratch); Assert.AreEqual(v, scratch); } Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd()); } // test bad field termOrds = cache.GetDocTermOrds(Reader, "bogusfield"); Assert.IsTrue(termOrds.ValueCount == 0); FieldCache_Fields.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey); }
/// <summary> /// Expert: Seeks a specific position by <seealso cref="TermState"/> previously obtained /// from <seealso cref="#termState()"/>. Callers should maintain the <seealso cref="TermState"/> to /// use this method. Low-level implementations may position the TermsEnum /// without re-seeking the term dictionary. /// <p> /// Seeking by <seealso cref="TermState"/> should only be used iff the state was obtained /// from the same <seealso cref="TermsEnum"/> instance. /// <p> /// NOTE: Using this method with an incompatible <seealso cref="TermState"/> might leave /// this <seealso cref="TermsEnum"/> in undefined state. On a segment level /// <seealso cref="TermState"/> instances are compatible only iff the source and the /// target <seealso cref="TermsEnum"/> operate on the same field. If operating on segment /// level, TermState instances must not be used across segments. /// <p> /// NOTE: A seek by <seealso cref="TermState"/> might not restore the /// <seealso cref="AttributeSource"/>'s state. <seealso cref="AttributeSource"/> states must be /// maintained separately if this method is used. </summary> /// <param name="term"> the term the TermState corresponds to </param> /// <param name="state"> the <seealso cref="TermState"/> /// </param> public virtual void SeekExact(BytesRef term, TermState state) { if (!SeekExact(term)) { throw new System.ArgumentException("term=" + term + " does not exist"); } }
public virtual void TestNonIndexedFields() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new StoredField("bogusbytes", "bogus")); doc.Add(new StoredField("bogusshorts", "bogus")); doc.Add(new StoredField("bogusints", "bogus")); doc.Add(new StoredField("boguslongs", "bogus")); doc.Add(new StoredField("bogusfloats", "bogus")); doc.Add(new StoredField("bogusdoubles", "bogus")); doc.Add(new StoredField("bogusterms", "bogus")); doc.Add(new StoredField("bogustermsindex", "bogus")); doc.Add(new StoredField("bogusmultivalued", "bogus")); doc.Add(new StoredField("bogusbits", "bogus")); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); FieldCache cache = FieldCache_Fields.DEFAULT; cache.PurgeAllCaches(); Assert.AreEqual(0, cache.CacheEntries.Length); Bytes bytes = cache.GetBytes(ar, "bogusbytes", true); Assert.AreEqual(0, bytes.Get(0)); Shorts shorts = cache.GetShorts(ar, "bogusshorts", true); Assert.AreEqual(0, shorts.Get(0)); Ints ints = cache.GetInts(ar, "bogusints", true); Assert.AreEqual(0, ints.Get(0)); Longs longs = cache.GetLongs(ar, "boguslongs", true); Assert.AreEqual(0, longs.Get(0)); Floats floats = cache.GetFloats(ar, "bogusfloats", true); Assert.AreEqual(0, floats.Get(0), 0.0f); Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true); Assert.AreEqual(0, doubles.Get(0), 0.0D); BytesRef scratch = new BytesRef(); BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true); binaries.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex"); Assert.AreEqual(-1, sorted.GetOrd(0)); sorted.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued"); sortedSet.Document = 0; Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Bits bits = cache.GetDocsWithField(ar, "bogusbits"); Assert.IsFalse(bits.Get(0)); // check that we cached nothing Assert.AreEqual(0, cache.CacheEntries.Length); ir.Dispose(); dir.Dispose(); }
public override SeekStatus SeekCeil(BytesRef term) { return SeekStatus.END; }
public override void SeekExact(BytesRef term, TermState state) { throw new InvalidOperationException("this method should never be called"); }
/// <summary> /// Returns prefix coded bits after reducing the precision by <code>shift</code> bits. /// this is method is used by <seealso cref="NumericTokenStream"/>. /// After encoding, {@code bytes.offset} will always be 0. </summary> /// <param name="val"> the numeric value </param> /// <param name="shift"> how many bits to strip from the right </param> /// <param name="bytes"> will contain the encoded value </param> public static void LongToPrefixCoded(long val, int shift, BytesRef bytes) { LongToPrefixCodedBytes(val, shift, bytes); }