Example #1
0
        public object Clone()
        {
            // LUCENENET: MemberwiseClone() doesn't throw in .NET
            TermBuffer clone = (TermBuffer)base.MemberwiseClone();

            clone.bytes = BytesRef.DeepCopyOf(bytes);
            return(clone);
        }
Example #2
0
        public Term ToTerm()
        {
            if (field is null) // unset
            {
                return(null);
            }

            return(term ?? (term = new Term(field, BytesRef.DeepCopyOf(bytes))));
        }
Example #3
0
        public object Clone()
        {
            TermBuffer clone = null;

            try
            {
                clone = (TermBuffer)base.MemberwiseClone();
            }
#pragma warning disable 168
            catch (InvalidOperationException e)
#pragma warning restore 168
            {
            }
            clone.bytes = BytesRef.DeepCopyOf(bytes);
            return(clone);
        }
        public virtual void TestAllUnicodeChars()
        {
            BytesRef utf8 = new BytesRef(10);
            CharsRef utf16 = new CharsRef(10);
            char[] chars = new char[2];
            for (int ch = 0; ch < 0x0010FFFF; ch++)
            {
                if (ch == 0xd800)
                // Skip invalid code points
                {
                    ch = 0xe000;
                }

                int len = 0;
                if (ch <= 0xffff)
                {
                    chars[len++] = (char)ch;
                }
                else
                {
                    chars[len++] = (char)(((ch - 0x0010000) >> 10) + UnicodeUtil.UNI_SUR_HIGH_START);
                    chars[len++] = (char)(((ch - 0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START);
                }

                UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8);

                string s1 = new string(chars, 0, len);
                string s2 = Encoding.UTF8.GetString(utf8.Bytes, utf8.Offset, utf8.Length);
                Assert.AreEqual(s1, s2, "codepoint " + ch);

                UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16);
                Assert.AreEqual(s1, new string(utf16.Chars, 0, utf16.Length), "codepoint " + ch);

                var b = s1.GetBytes(Encoding.UTF8);
                Assert.AreEqual(utf8.Length, b.Length);
                for (int j = 0; j < utf8.Length; j++)
                {
                    Assert.AreEqual(utf8.Bytes[j], b[j]);
                }
            }
        }
 public override sealed void Get(int docID, BytesRef result)
 {
     Get((long)docID, result);
 }
        private void CheckTermsOrder(IndexReader r, ISet<string> allTerms, bool isTop)
        {
            TermsEnum terms = MultiFields.GetFields(r).Terms("f").Iterator(null);

            BytesRef last = new BytesRef();

            HashSet<string> seenTerms = new HashSet<string>();

            while (true)
            {
                BytesRef term = terms.Next();
                if (term == null)
                {
                    break;
                }

                Assert.IsTrue(last.CompareTo(term) < 0);
                last.CopyBytes(term);

                string s = term.Utf8ToString();
                Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")");
                seenTerms.Add(s);
            }

            if (isTop)
            {
                Assert.IsTrue(allTerms.SetEquals(seenTerms));
            }

            // Test seeking:
            IEnumerator<string> it = seenTerms.GetEnumerator();
            while (it.MoveNext())
            {
                BytesRef tr = new BytesRef(it.Current);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString()));
            }
        }
 private void AddDoc(IndexWriter writer, int id)
 {
     Document doc = new Document();
     doc.Add(new TextField("content", "aaa", Field.Store.NO));
     doc.Add(new StringField("id", Convert.ToString(id), Field.Store.YES));
     FieldType customType2 = new FieldType(TextField.TYPE_STORED);
     customType2.StoreTermVectors = true;
     customType2.StoreTermVectorPositions = true;
     customType2.StoreTermVectorOffsets = true;
     doc.Add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
     doc.Add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
     doc.Add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
     doc.Add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
     // add numeric fields, to test if flex preserves encoding
     doc.Add(new IntField("trieInt", id, Field.Store.NO));
     doc.Add(new LongField("trieLong", (long)id, Field.Store.NO));
     // add docvalues fields
     doc.Add(new NumericDocValuesField("dvByte", (sbyte)id));
     sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
     BytesRef @ref = new BytesRef(bytes);
     doc.Add(new BinaryDocValuesField("dvBytesDerefFixed", @ref));
     doc.Add(new BinaryDocValuesField("dvBytesDerefVar", @ref));
     doc.Add(new SortedDocValuesField("dvBytesSortedFixed", @ref));
     doc.Add(new SortedDocValuesField("dvBytesSortedVar", @ref));
     doc.Add(new BinaryDocValuesField("dvBytesStraightFixed", @ref));
     doc.Add(new BinaryDocValuesField("dvBytesStraightVar", @ref));
     doc.Add(new DoubleDocValuesField("dvDouble", (double)id));
     doc.Add(new FloatDocValuesField("dvFloat", (float)id));
     doc.Add(new NumericDocValuesField("dvInt", id));
     doc.Add(new NumericDocValuesField("dvLong", id));
     doc.Add(new NumericDocValuesField("dvPacked", id));
     doc.Add(new NumericDocValuesField("dvShort", (short)id));
     // a field with both offsets and term vectors for a cross-check
     FieldType customType3 = new FieldType(TextField.TYPE_STORED);
     customType3.StoreTermVectors = true;
     customType3.StoreTermVectorPositions = true;
     customType3.StoreTermVectorOffsets = true;
     customType3.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
     doc.Add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
     // a field that omits only positions
     FieldType customType4 = new FieldType(TextField.TYPE_STORED);
     customType4.StoreTermVectors = true;
     customType4.StoreTermVectorPositions = false;
     customType4.StoreTermVectorOffsets = true;
     customType4.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS;
     doc.Add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
     // TODO:
     //   index different norms types via similarity (we use a random one currently?!)
     //   remove any analyzer randomness, explicitly add payloads for certain fields.
     writer.AddDocument(doc);
 }
        public virtual void SearchIndex(Directory dir, string oldName)
        {
            //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
            //Query query = parser.parse("handle:1");

            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);

            TestUtil.CheckIndex(dir);

            // true if this is a 4.0+ index
            bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;

            Bits liveDocs = MultiFields.GetLiveDocs(reader);

            for (int i = 0; i < 35; i++)
            {
                if (liveDocs.Get(i))
                {
                    Document d = reader.Document(i);
                    IList<IndexableField> fields = d.Fields;
                    bool isProxDoc = d.GetField("content3") == null;
                    if (isProxDoc)
                    {
                        int numFields = is40Index ? 7 : 5;
                        Assert.AreEqual(numFields, fields.Count);
                        IndexableField f = d.GetField("id");
                        Assert.AreEqual("" + i, f.StringValue);

                        f = d.GetField("utf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("autf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("content2");
                        Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                        f = d.GetField("fie\u2C77ld");
                        Assert.AreEqual("field with non-ascii name", f.StringValue);
                    }

                    Fields tfvFields = reader.GetTermVectors(i);
                    Assert.IsNotNull(tfvFields, "i=" + i);
                    Terms tfv = tfvFields.Terms("utf8");
                    Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
                }
                else
                {
                    // Only ID 7 is deleted
                    Assert.AreEqual(7, i);
                }
            }

            if (is40Index)
            {
                // check docvalues fields
                NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte");
                BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
                BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
                SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
                SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
                BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
                BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
                NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble");
                NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat");
                NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt");
                NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong");
                NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked");
                NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort");

                for (int i = 0; i < 35; i++)
                {
                    int id = Convert.ToInt32(reader.Document(i).Get("id"));
                    Assert.AreEqual(id, dvByte.Get(i));

                    sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
                    BytesRef expectedRef = new BytesRef(bytes);
                    BytesRef scratch = new BytesRef();

                    dvBytesDerefFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesDerefVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);

                    Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D);
                    Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F);
                    Assert.AreEqual(id, dvInt.Get(i));
                    Assert.AreEqual(id, dvLong.Get(i));
                    Assert.AreEqual(id, dvPacked.Get(i));
                    Assert.AreEqual(id, dvShort.Get(i));
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d_ = searcher.IndexReader.Document(hits[0].Doc);
            Assert.AreEqual("didn't get the right document first", "21", d_.Get("id"));

            DoTestHits(hits, 34, searcher.IndexReader);

            if (is40Index)
            {
                hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);

                hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);
            }

            hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);

            reader.Dispose();
        }
Example #9
0
 internal FileAndTop(int fd, byte[] firstLine)
 {
     this.Fd      = fd;
     this.Current = new BytesRef(firstLine);
 }
Example #10
0
        public virtual void TestComplexSpanChecks()
        {
            SpanTermQuery one = new SpanTermQuery(new Term("field", "one"));
            SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand"));
            //should be one position in between
            SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred"));
            SpanTermQuery three = new SpanTermQuery(new Term("field", "three"));

            SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[] { one, thous }, 0, true);
            SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[] { hundred, three }, 0, true);
            SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[] { oneThous, hundredThree }, 1, true);
            SpanQuery query;
            //this one's too small
            query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2);
            CheckHits(query, new int[] { });
            //this one's just right
            query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6);
            CheckHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });

            var payloads = new List<byte[]>();
            BytesRef pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
            BytesRef pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
            BytesRef pay3 = new BytesRef(("pos: " + 3).GetBytes(IOUtils.CHARSET_UTF_8));
            BytesRef pay4 = new BytesRef(("pos: " + 4).GetBytes(IOUtils.CHARSET_UTF_8));
            payloads.Add(pay.Bytes);
            payloads.Add(pay2.Bytes);
            payloads.Add(pay3.Bytes);
            payloads.Add(pay4.Bytes);
            query = new SpanNearPayloadCheckQuery(oneThousHunThree, payloads);
            CheckHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });
        }
 public override void Get(long id, BytesRef result)
 {
     try
     {
         TermsEnum_Renamed.SeekExact(id);
         BytesRef term = TermsEnum_Renamed.Term();
         result.Bytes = term.Bytes;
         result.Offset = term.Offset;
         result.Length = term.Length;
     }
     catch (Exception e)
     {
         throw;
     }
 }
Example #12
0
        /// <summary>
        /// Adds a new <see cref="BytesRef"/>
        /// </summary>
        /// <param name="bytes">
        ///          The bytes to hash </param>
        /// <returns> The id the given bytes are hashed if there was no mapping for the
        ///         given bytes, otherwise <c>(-(id)-1)</c>. this guarantees
        ///         that the return value will always be &gt;= 0 if the given bytes
        ///         haven't been hashed before.
        /// </returns>
        /// <exception cref="MaxBytesLengthExceededException">
        ///           if the given bytes are > 2 +
        ///           <see cref="ByteBlockPool.BYTE_BLOCK_SIZE"/> </exception>
        public int Add(BytesRef bytes)
        {
            Debug.Assert(bytesStart != null, "Bytesstart is null - not initialized");
            int length = bytes.Length;
            // final position
            int hashPos = FindHash(bytes);
            int e       = ids[hashPos];

            if (e == -1)
            {
                // new entry
                int len2 = 2 + bytes.Length;
                if (len2 + pool.ByteUpto > ByteBlockPool.BYTE_BLOCK_SIZE)
                {
                    if (len2 > ByteBlockPool.BYTE_BLOCK_SIZE)
                    {
                        throw new MaxBytesLengthExceededException("bytes can be at most " + (ByteBlockPool.BYTE_BLOCK_SIZE - 2) + " in length; got " + bytes.Length);
                    }
                    pool.NextBuffer();
                }
                var buffer     = pool.Buffer;
                int bufferUpto = pool.ByteUpto;
                if (count >= bytesStart.Length)
                {
                    bytesStart = bytesStartArray.Grow();
                    Debug.Assert(count < bytesStart.Length + 1, "count: " + count + " len: " + bytesStart.Length);
                }
                e = count++;

                bytesStart[e] = bufferUpto + pool.ByteOffset;

                // We first encode the length, followed by the
                // bytes. Length is encoded as vInt, but will consume
                // 1 or 2 bytes at most (we reject too-long terms,
                // above).
                if (length < 128)
                {
                    // 1 byte to store length
                    buffer[bufferUpto] = (byte)length;
                    pool.ByteUpto     += length + 1;
                    Debug.Assert(length >= 0, "Length must be positive: " + length);
                    System.Buffer.BlockCopy(bytes.Bytes, bytes.Offset, buffer, bufferUpto + 1, length);
                }
                else
                {
                    // 2 byte to store length
                    buffer[bufferUpto]     = (byte)(0x80 | (length & 0x7f));
                    buffer[bufferUpto + 1] = (byte)((length >> 7) & 0xff);
                    pool.ByteUpto         += length + 2;
                    System.Buffer.BlockCopy(bytes.Bytes, bytes.Offset, buffer, bufferUpto + 2, length);
                }
                Debug.Assert(ids[hashPos] == -1);
                ids[hashPos] = e;

                if (count == hashHalfSize)
                {
                    Rehash(2 * hashSize, true);
                }
                return(e);
            }
            return(-(e + 1));
        }
Example #13
0
 private bool Equals(int id, BytesRef b)
 {
     pool.SetBytesRef(scratch1, bytesStart[id]);
     return(scratch1.BytesEquals(b));
 }
Example #14
0
 /// <summary>
 /// Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>. Otherwise
 /// <c>false</c>.
 /// </summary>
 /// <param name="ref">
 ///          The <see cref="BytesRef"/> to test. </param>
 /// <param name="suffix">
 ///          The expected suffix </param>
 /// <returns> Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>.
 ///         Otherwise <c>false</c>. </returns>
 public static bool EndsWith(BytesRef @ref, BytesRef suffix)
 {
     return(SliceEquals(@ref, suffix, @ref.Length - suffix.Length));
 }
Example #15
0
 /// <summary>
 /// Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>.
 /// Otherwise <c>false</c>.
 /// </summary>
 /// <param name="ref">
 ///          The <see cref="BytesRef"/> to test. </param>
 /// <param name="prefix">
 ///          The expected prefix </param>
 /// <returns> Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>.
 ///         Otherwise <c>false</c>. </returns>
 public static bool StartsWith(BytesRef @ref, BytesRef prefix)
 {
     return(SliceEquals(@ref, prefix, 0));
 }
Example #16
0
 /// <summary>
 /// Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>. Otherwise
 /// <c>false</c>.
 /// </summary>
 /// <param name="ref">
 ///          The <see cref="BytesRef"/> to test. </param>
 /// <param name="suffix">
 ///          The expected suffix </param>
 /// <returns> Returns <c>true</c> if the <paramref name="ref"/> ends with the given <paramref name="suffix"/>.
 ///         Otherwise <c>false</c>. </returns>
 public static bool EndsWith(BytesRef @ref, BytesRef suffix) // LUCENENET TODO: API - convert to extension method
 {
     return(SliceEquals(@ref, suffix, @ref.Length - suffix.Length));
 }
Example #17
0
 /// <summary>
 /// Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>.
 /// Otherwise <c>false</c>.
 /// </summary>
 /// <param name="ref">
 ///          The <see cref="BytesRef"/> to test. </param>
 /// <param name="prefix">
 ///          The expected prefix </param>
 /// <returns> Returns <c>true</c> if the <paramref name="ref"/> starts with the given <paramref name="prefix"/>.
 ///         Otherwise <c>false</c>. </returns>
 public static bool StartsWith(BytesRef @ref, BytesRef prefix) // LUCENENET TODO: API - convert to extension method
 {
     return(SliceEquals(@ref, prefix, 0));
 }
Example #18
0
            public override sealed bool IncrementToken()
            {
                bool hasNext = input.IncrementToken();
                if (!hasNext)
                {
                    return false;
                }

                // Some values of the same field are to have payloads and others not
                if (Offset + Length <= Data.Length && !TermAttribute.ToString().EndsWith("NO PAYLOAD"))
                {
                    BytesRef p = new BytesRef(Data, Offset, Length);
                    PayloadAtt.Payload = p;
                    Offset += Length;
                }
                else
                {
                    PayloadAtt.Payload = null;
                }

                return true;
            }
 public override void Get(long id, BytesRef result)
 {
     long address = Bytes.Offset + id * Bytes.MaxLength;
     try
     {
         Data.Seek(address);
         // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource)
         // assume "they" own the bytes after calling this!
         var buffer = new byte[Bytes.MaxLength];
         Data.ReadBytes(buffer, 0, buffer.Length);
         result.Bytes = buffer;
         result.Offset = 0;
         result.Length = buffer.Length;
     }
     catch (Exception)
     {
         throw;
     }
 }
 public override void LookupOrd(int ord, BytesRef result)
 {
     Binary.Get(ord, result);
 }
Example #21
0
 /// <summary>
 /// Returns the id of the given <see cref="BytesRef"/>.
 /// </summary>
 /// <param name="bytes">
 ///          The bytes to look for
 /// </param>
 /// <returns> The id of the given bytes, or <c>-1</c> if there is no mapping for the
 ///         given bytes. </returns>
 public int Find(BytesRef bytes)
 {
     return(ids[FindHash(bytes)]);
 }
 public TermsEnumAnonymousInnerClassHelper(CompressedBinaryDocValues outerInstance, IndexInput input)
 {
     this.OuterInstance = outerInstance;
     this.Input = input;
     currentOrd = -1;
     termBuffer = new BytesRef(outerInstance.Bytes.MaxLength < 0 ? 0 : outerInstance.Bytes.MaxLength);
     term = new BytesRef();
 }
Example #23
0
 public static bool StartsWith(this BytesRef @ref, BytesRef prefix) // LUCENENET specific - converted to extension method
 {
     return(SliceEquals(@ref, prefix, 0));
 }
Example #24
0
 internal static Term DeepCopyOf(Term other)
 {
     return(new Term(other.Field, BytesRef.DeepCopyOf(other.Bytes)));
 }
Example #25
0
 public static bool EndsWith(this BytesRef @ref, BytesRef suffix) // LUCENENET specific - converted to extension method
 {
     return(SliceEquals(@ref, suffix, @ref.Length - suffix.Length));
 }
Example #26
0
 /// <summary>
 /// Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
 /// this is method is used by <seealso cref="NumericTokenStream"/>.
 /// After encoding, {@code bytes.offset} will always be 0. </summary>
 /// <param name="val"> the numeric value </param>
 /// <param name="shift"> how many bits to strip from the right </param>
 /// <param name="bytes"> will contain the encoded value </param>
 public static void IntToPrefixCoded(int val, int shift, BytesRef bytes)
 {
     IntToPrefixCodedBytes(val, shift, bytes);
 }
Example #27
0
 public static int Murmurhash3_x86_32(BytesRef bytes, int seed)
 {
     return(Murmurhash3_x86_32(bytes.Bytes, bytes.Offset, bytes.Length, seed));
 }
        public virtual void TestNextIntoWrongField()
        {
            foreach (string name in OldNames)
            {
                Directory dir = OldIndexDirs[name];
                IndexReader r = DirectoryReader.Open(dir);
                TermsEnum terms = MultiFields.GetFields(r).Terms("content").Iterator(null);
                BytesRef t = terms.Next();
                Assert.IsNotNull(t);

                // content field only has term aaa:
                Assert.AreEqual("aaa", t.Utf8ToString());
                Assert.IsNull(terms.Next());

                BytesRef aaaTerm = new BytesRef("aaa");

                // should be found exactly
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm));
                Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, 0)));
                Assert.IsNull(terms.Next());

                // should hit end of field
                Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("bbb")));
                Assert.IsNull(terms.Next());

                // should seek to aaa
                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, terms.SeekCeil(new BytesRef("a")));
                Assert.IsTrue(terms.Term().BytesEquals(aaaTerm));
                Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, 0)));
                Assert.IsNull(terms.Next());

                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm));
                Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, 0)));
                Assert.IsNull(terms.Next());

                r.Dispose();
            }
        }
Example #29
0
        public virtual void TestDataInputOutput2()
        {
            Random random = Random;

            for (int iter = 0; iter < 5 * RandomMultiplier; iter++)
            {
                int        blockBits = TestUtil.NextInt32(random, 1, 20);
                int        blockSize = 1 << blockBits;
                PagedBytes p         = new PagedBytes(blockBits);
                DataOutput @out      = p.GetDataOutput();
                int        numBytes  = LuceneTestCase.Random.Next(10000000);

                byte[] answer = new byte[numBytes];
                LuceneTestCase.Random.NextBytes(answer);
                int written = 0;
                while (written < numBytes)
                {
                    if (LuceneTestCase.Random.Next(10) == 7)
                    {
                        @out.WriteByte(answer[written++]);
                    }
                    else
                    {
                        int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - written);
                        @out.WriteBytes(answer, written, chunk);
                        written += chunk;
                    }
                }

                PagedBytes.Reader reader = p.Freeze(random.NextBoolean());

                DataInput @in = p.GetDataInput();

                byte[] verify = new byte[numBytes];
                int    read   = 0;
                while (read < numBytes)
                {
                    if (LuceneTestCase.Random.Next(10) == 7)
                    {
                        verify[read++] = @in.ReadByte();
                    }
                    else
                    {
                        int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - read);
                        @in.ReadBytes(verify, read, chunk);
                        read += chunk;
                    }
                }
                Assert.IsTrue(Arrays.Equals(answer, verify));

                BytesRef slice = new BytesRef();
                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    int pos = random.Next(numBytes - 1);
                    int len = random.Next(Math.Min(blockSize + 1, numBytes - pos));
                    reader.FillSlice(slice, pos, len);
                    for (int byteUpto = 0; byteUpto < len; byteUpto++)
                    {
                        Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]);
                    }
                }
            }
        }
        public virtual void TestRandomUnicodeStrings()
        {
            char[] buffer = new char[20];
            char[] expected = new char[20];

            BytesRef utf8 = new BytesRef(20);
            CharsRef utf16 = new CharsRef(20);

            int num = AtLeast(100000);
            for (int iter = 0; iter < num; iter++)
            {
                bool hasIllegal = FillUnicode(buffer, expected, 0, 20);

                UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8);
                if (!hasIllegal)
                {
                    var b = (new string(buffer, 0, 20)).GetBytes(IOUtils.CHARSET_UTF_8);
                    Assert.AreEqual(b.Length, utf8.Length);
                    for (int i = 0; i < b.Length; i++)
                    {
                        Assert.AreEqual(b[i], utf8.Bytes[i]);
                    }
                }

                UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16);
                Assert.AreEqual(utf16.Length, 20);
                for (int i = 0; i < 20; i++)
                {
                    Assert.AreEqual(expected[i], utf16.Chars[i]);
                }
            }
        }
Example #31
0
        public virtual void TestDataInputOutput()
        {
            Random random = Random;

            for (int iter = 0; iter < 5 * RandomMultiplier; iter++)
            {
                BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow"));
                if (dir is MockDirectoryWrapper)
                {
                    ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
                }
                int         blockBits = TestUtil.NextInt32(random, 1, 20);
                int         blockSize = 1 << blockBits;
                PagedBytes  p         = new PagedBytes(blockBits);
                IndexOutput @out      = dir.CreateOutput("foo", IOContext.DEFAULT);
                int         numBytes  = TestUtil.NextInt32(LuceneTestCase.Random, 2, 10000000);

                byte[] answer = new byte[numBytes];
                LuceneTestCase.Random.NextBytes(answer);
                int written = 0;
                while (written < numBytes)
                {
                    if (LuceneTestCase.Random.Next(10) == 7)
                    {
                        @out.WriteByte(answer[written++]);
                    }
                    else
                    {
                        int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - written);
                        @out.WriteBytes(answer, written, chunk);
                        written += chunk;
                    }
                }

                @out.Dispose();
                IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT);
                DataInput  @in   = (DataInput)input.Clone();

                p.Copy(input, input.Length);
                PagedBytes.Reader reader = p.Freeze(random.NextBoolean());

                byte[] verify = new byte[numBytes];
                int    read   = 0;
                while (read < numBytes)
                {
                    if (LuceneTestCase.Random.Next(10) == 7)
                    {
                        verify[read++] = @in.ReadByte();
                    }
                    else
                    {
                        int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - read);
                        @in.ReadBytes(verify, read, chunk);
                        read += chunk;
                    }
                }
                Assert.IsTrue(Arrays.Equals(answer, verify));

                BytesRef slice = new BytesRef();
                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    int pos = random.Next(numBytes - 1);
                    int len = random.Next(Math.Min(blockSize + 1, numBytes - pos));
                    reader.FillSlice(slice, pos, len);
                    for (int byteUpto = 0; byteUpto < len; byteUpto++)
                    {
                        Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]);
                    }
                }
                input.Dispose();
                dir.Dispose();
            }
        }
Example #32
0
        public virtual void TestAddByPoolOffset()
        {
            BytesRef     @ref       = new BytesRef();
            BytesRef     scratch    = new BytesRef();
            BytesRefHash offsetHash = NewHash(pool);
            int          num        = AtLeast(2);

            for (int j = 0; j < num; j++)
            {
                ISet <string> strings     = new JCG.HashSet <string>();
                int           uniqueCount = 0;
                for (int i = 0; i < 797; i++)
                {
                    string str;
                    do
                    {
                        str = TestUtil.RandomRealisticUnicodeString(Random, 1000);
                    } while (str.Length == 0);
                    @ref.CopyChars(str);
                    int count = hash.Count;
                    int key   = hash.Add(@ref);

                    if (key >= 0)
                    {
                        Assert.IsTrue(strings.Add(str));
                        Assert.AreEqual(uniqueCount, key);
                        Assert.AreEqual(hash.Count, count + 1);
                        int offsetKey = offsetHash.AddByPoolOffset(hash.ByteStart(key));
                        Assert.AreEqual(uniqueCount, offsetKey);
                        Assert.AreEqual(offsetHash.Count, count + 1);
                        uniqueCount++;
                    }
                    else
                    {
                        Assert.IsFalse(strings.Add(str));
                        Assert.IsTrue((-key) - 1 < count);
                        Assert.AreEqual(str, hash.Get((-key) - 1, scratch).Utf8ToString());
                        Assert.AreEqual(count, hash.Count);
                        int offsetKey = offsetHash.AddByPoolOffset(hash.ByteStart((-key) - 1));
                        Assert.IsTrue((-offsetKey) - 1 < count);
                        Assert.AreEqual(str, hash.Get((-offsetKey) - 1, scratch).Utf8ToString());
                        Assert.AreEqual(count, hash.Count);
                    }
                }

                AssertAllIn(strings, hash);
                foreach (string @string in strings)
                {
                    @ref.CopyChars(@string);
                    int      key      = hash.Add(@ref);
                    BytesRef bytesRef = offsetHash.Get((-key) - 1, scratch);
                    Assert.AreEqual(@ref, bytesRef);
                }

                hash.Clear();
                Assert.AreEqual(0, hash.Count);
                offsetHash.Clear();
                Assert.AreEqual(0, offsetHash.Count);
                hash.Reinit(); // init for the next round
                offsetHash.Reinit();
            }
        }
Example #33
0
 public BytesRefIteratorAnonymousInnerClassHelper(BytesRefArray outerInstance, IComparer <BytesRef> comp, BytesRef spare, int size, int[] indices)
 {
     this.OuterInstance = outerInstance;
     this.Comp          = comp;
     this.Spare         = spare;
     this.Size          = size;
     this.Indices       = indices;
     pos = 0;
 }
 public abstract void Get(long id, BytesRef Result);
 internal static byte[] Decompress(Decompressor decompressor, byte[] compressed, int originalLength)
 {
     BytesRef bytes = new BytesRef();
     decompressor.Decompress(new ByteArrayDataInput(compressed), originalLength, 0, originalLength, bytes);
     return Arrays.CopyOfRange(bytes.Bytes, bytes.Offset, bytes.Offset + bytes.Length);
 }
 public override void Get(long id, BytesRef result)
 {
     long startAddress = Bytes.Offset + (id == 0 ? 0 : Addresses.Get(id - 1));
     long endAddress = Bytes.Offset + Addresses.Get(id);
     int length = (int)(endAddress - startAddress);
     try
     {
         Data.Seek(startAddress);
         // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource)
         // assume "they" own the bytes after calling this!
         var buffer = new byte[length];
         Data.ReadBytes(buffer, 0, buffer.Length);
         result.Bytes = buffer;
         result.Offset = 0;
         result.Length = length;
     }
     catch (Exception)
     {
         throw;
     }
 }
 internal virtual byte[] Decompress(byte[] compressed, int originalLength, int offset, int length)
 {
     Decompressor decompressor = Mode.NewDecompressor();
     BytesRef bytes = new BytesRef();
     decompressor.Decompress(new ByteArrayDataInput(compressed), originalLength, offset, length, bytes);
     return Arrays.CopyOfRange(bytes.Bytes, bytes.Offset, bytes.Offset + bytes.Length);
 }
 public override int LookupTerm(BytesRef key)
 {
     if (Binary is CompressedBinaryDocValues)
     {
         return (int)((CompressedBinaryDocValues)Binary).LookupTerm(key);
     }
     else
     {
         return base.LookupTerm(key);
     }
 }
Example #39
0
 /// <summary>
 /// Seeks to the specified term, if it exists, or to the
 ///  next (ceiling) term.  Returns SeekStatus to
 ///  indicate whether exact term was found, a different
 ///  term was found, or EOF was hit.  The target term may
 ///  be before or after the current term.  If this returns
 ///  SeekStatus.END, the enum is unpositioned.
 /// </summary>
 public abstract SeekStatus SeekCeil(BytesRef text);
 internal virtual long LookupTerm(BytesRef key)
 {
     try
     {
         TermsEnum.SeekStatus status = TermsEnum_Renamed.SeekCeil(key);
         if (status == TermsEnum.SeekStatus.END)
         {
             return -NumValues - 1;
         }
         else if (status == TermsEnum.SeekStatus.FOUND)
         {
             return TermsEnum_Renamed.Ord();
         }
         else
         {
             return -TermsEnum_Renamed.Ord() - 1;
         }
     }
     catch (Exception)
     {
         throw;
     }
 }
Example #41
0
        public static void BeforeClass()
        {
            NUM_DOCS = AtLeast(500);
            NUM_ORDS = AtLeast(2);
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            long theLong = long.MaxValue;
            double theDouble = double.MaxValue;
            sbyte theByte = sbyte.MaxValue;
            short theShort = short.MaxValue;
            int theInt = int.MaxValue;
            float theFloat = float.MaxValue;
            UnicodeStrings = new string[NUM_DOCS];
            MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS];
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp");
            }
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO));
                doc.Add(NewStringField("theDouble", Convert.ToString(theDouble--), Field.Store.NO));
                doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO));
                doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO));
                doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO));
                doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO));
                if (i % 2 == 0)
                {
                    doc.Add(NewStringField("sparse", Convert.ToString(i), Field.Store.NO));
                }

                if (i % 2 == 0)
                {
                    doc.Add(new IntField("numInt", i, Field.Store.NO));
                }

                // sometimes skip the field:
                if (Random().Next(40) != 17)
                {
                    UnicodeStrings[i] = GenerateString(i);
                    doc.Add(NewStringField("theRandomUnicodeString", UnicodeStrings[i], Field.Store.YES));
                }

                // sometimes skip the field:
                if (Random().Next(10) != 8)
                {
                    for (int j = 0; j < NUM_ORDS; j++)
                    {
                        string newValue = GenerateString(i);
                        MultiValued[i, j] = new BytesRef(newValue);
                        doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
                    }
                    Array.Sort(MultiValued[i]);
                }
                writer.AddDocument(doc);
            }
            IndexReader r = writer.Reader;
            Reader = SlowCompositeReaderWrapper.Wrap(r);
            writer.Dispose();
        }
                public override TermsEnum.SeekStatus SeekCeil(BytesRef text)
                {
                    // binary-search just the index values to find the block,
                    // then scan within the block
                    long low = 0;
                    long high = OuterInstance.NumIndexValues - 1;

                    while (low <= high)
                    {
                        long mid = (int)((uint)(low + high) >> 1);
                        DoSeek(mid * OuterInstance.Interval);
                        int cmp = termBuffer.CompareTo(text);

                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            // we got lucky, found an indexed term
                            SetTerm();
                            return TermsEnum.SeekStatus.FOUND;
                        }
                    }

                    if (OuterInstance.NumIndexValues == 0)
                    {
                        return TermsEnum.SeekStatus.END;
                    }

                    // block before insertion point
                    long block = low - 1;
                    DoSeek(block < 0 ? -1 : block * OuterInstance.Interval);

                    while (DoNext() != null)
                    {
                        int cmp = termBuffer.CompareTo(text);
                        if (cmp == 0)
                        {
                            SetTerm();
                            return TermsEnum.SeekStatus.FOUND;
                        }
                        else if (cmp > 0)
                        {
                            SetTerm();
                            return TermsEnum.SeekStatus.NOT_FOUND;
                        }
                    }

                    return TermsEnum.SeekStatus.END;
                }
Example #43
0
        public virtual void TestDocValuesIntegration()
        {
            AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues());
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
            Document doc = new Document();
            doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
            doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
            doc.Add(new NumericDocValuesField("numeric", 42));
            if (DefaultCodecSupportsSortedSet())
            {
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
            }
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;
            iw.Dispose();
            AtomicReader ar = GetOnlySegmentReader(ir);

            BytesRef scratch = new BytesRef();

            // Binary type: can be retrieved via getTerms()
            try
            {
                FieldCache_Fields.DEFAULT.GetInts(ar, "binary", false);
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            BinaryDocValues binary = FieldCache_Fields.DEFAULT.GetTerms(ar, "binary", true);
            binary.Get(0, scratch);
            Assert.AreEqual("binary value", scratch.Utf8ToString());

            try
            {
                FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "binary");
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            try
            {
                FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "binary");
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            try
            {
                new DocTermOrds(ar, null, "binary");
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            Bits bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "binary");
            Assert.IsTrue(bits.Get(0));

            // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
            try
            {
                FieldCache_Fields.DEFAULT.GetInts(ar, "sorted", false);
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            try
            {
                new DocTermOrds(ar, null, "sorted");
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            binary = FieldCache_Fields.DEFAULT.GetTerms(ar, "sorted", true);
            binary.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedDocValues sorted = FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "sorted");
            Assert.AreEqual(0, sorted.GetOrd(0));
            Assert.AreEqual(1, sorted.ValueCount);
            sorted.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedSetDocValues sortedSet = FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "sorted");
            sortedSet.Document = 0;
            Assert.AreEqual(0, sortedSet.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
            Assert.AreEqual(1, sortedSet.ValueCount);

            bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "sorted");
            Assert.IsTrue(bits.Get(0));

            // Numeric type: can be retrieved via getInts() and so on
            Ints numeric = FieldCache_Fields.DEFAULT.GetInts(ar, "numeric", false);
            Assert.AreEqual(42, numeric.Get(0));

            try
            {
                FieldCache_Fields.DEFAULT.GetTerms(ar, "numeric", true);
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            try
            {
                FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "numeric");
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            try
            {
                FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "numeric");
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            try
            {
                new DocTermOrds(ar, null, "numeric");
                Assert.Fail();
            }
            catch (InvalidOperationException expected)
            {
            }

            bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "numeric");
            Assert.IsTrue(bits.Get(0));

            // SortedSet type: can be retrieved via getDocTermOrds()
            if (DefaultCodecSupportsSortedSet())
            {
                try
                {
                    FieldCache_Fields.DEFAULT.GetInts(ar, "sortedset", false);
                    Assert.Fail();
                }
                catch (InvalidOperationException expected)
                {
                }

                try
                {
                    FieldCache_Fields.DEFAULT.GetTerms(ar, "sortedset", true);
                    Assert.Fail();
                }
                catch (InvalidOperationException expected)
                {
                }

                try
                {
                    FieldCache_Fields.DEFAULT.GetTermsIndex(ar, "sortedset");
                    Assert.Fail();
                }
                catch (InvalidOperationException expected)
                {
                }

                try
                {
                    new DocTermOrds(ar, null, "sortedset");
                    Assert.Fail();
                }
                catch (InvalidOperationException expected)
                {
                }

                sortedSet = FieldCache_Fields.DEFAULT.GetDocTermOrds(ar, "sortedset");
                sortedSet.Document = 0;
                Assert.AreEqual(0, sortedSet.NextOrd());
                Assert.AreEqual(1, sortedSet.NextOrd());
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
                Assert.AreEqual(2, sortedSet.ValueCount);

                bits = FieldCache_Fields.DEFAULT.GetDocsWithField(ar, "sortedset");
                Assert.IsTrue(bits.Get(0));
            }

            ir.Dispose();
            dir.Dispose();
        }
Example #44
0
        public virtual void TestSpanPayloadCheck()
        {
            SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
            BytesRef pay = new BytesRef(("pos: " + 5).GetBytes(IOUtils.CHARSET_UTF_8));
            SpanQuery query = new SpanPayloadCheckQuery(term1, new List<byte[]>() { pay.Bytes });
            CheckHits(query, new int[] { 1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995 });
            Assert.IsTrue(Searcher.Explain(query, 1125).Value > 0.0f);

            SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
            SpanNearQuery snq;
            SpanQuery[] clauses;
            IList<byte[]> list;
            BytesRef pay2;
            clauses = new SpanQuery[2];
            clauses[0] = term1;
            clauses[1] = term2;
            snq = new SpanNearQuery(clauses, 0, true);
            pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
            pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
            list = new List<byte[]>();
            list.Add(pay.Bytes);
            list.Add(pay2.Bytes);
            query = new SpanNearPayloadCheckQuery(snq, list);
            CheckHits(query, new int[] { 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599 });
            clauses = new SpanQuery[3];
            clauses[0] = term1;
            clauses[1] = term2;
            clauses[2] = new SpanTermQuery(new Term("field", "five"));
            snq = new SpanNearQuery(clauses, 0, true);
            pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
            pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
            BytesRef pay3 = new BytesRef(("pos: " + 2).GetBytes(IOUtils.CHARSET_UTF_8));
            list = new List<byte[]>();
            list.Add(pay.Bytes);
            list.Add(pay2.Bytes);
            list.Add(pay3.Bytes);
            query = new SpanNearPayloadCheckQuery(snq, list);
            CheckHits(query, new int[] { 505 });
        }
Example #45
0
 /// <summary>
 /// Overwrite this method, if you like to receive the already prefix encoded range bounds.
 /// You can directly build classical range (inclusive) queries from them.
 /// </summary>
 public virtual void AddRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded)
 {
     throw new System.NotSupportedException();
 }
Example #46
0
 /// <summary>
 /// Attempts to seek to the exact term, returning
 ///  true if the term is found.  If this returns false, the
 ///  enum is unpositioned.  For some codecs, seekExact may
 ///  be substantially faster than <seealso cref="#seekCeil"/>.
 /// </summary>
 public virtual bool SeekExact(BytesRef text)
 {
     return SeekCeil(text) == SeekStatus.FOUND;
 }
Example #47
0
 protected internal override AcceptStatus Accept(BytesRef term)
 {
     return(NumericUtils.GetPrefixCodedIntShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END);
 }
Example #48
0
        public virtual void Test()
        {
            FieldCache cache = FieldCache_Fields.DEFAULT;
            FieldCache_Fields.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random().NextBoolean());
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache_Fields.DEFAULT_DOUBLE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i));
            }

            FieldCache_Fields.Longs longs = cache.GetLongs(Reader, "theLong", Random().NextBoolean());
            Assert.AreSame(longs, cache.GetLongs(Reader, "theLong", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(longs, cache.GetLongs(Reader, "theLong", FieldCache_Fields.DEFAULT_LONG_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i);
            }

            FieldCache_Fields.Bytes bytes = cache.GetBytes(Reader, "theByte", Random().NextBoolean());
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache_Fields.DEFAULT_BYTE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i));
            }

            FieldCache_Fields.Shorts shorts = cache.GetShorts(Reader, "theShort", Random().NextBoolean());
            Assert.AreSame(shorts, cache.GetShorts(Reader, "theShort", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(shorts, cache.GetShorts(Reader, "theShort", FieldCache_Fields.DEFAULT_SHORT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i));
            }

            FieldCache_Fields.Ints ints = cache.GetInts(Reader, "theInt", Random().NextBoolean());
            Assert.AreSame(ints, cache.GetInts(Reader, "theInt", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(ints, cache.GetInts(Reader, "theInt", FieldCache_Fields.DEFAULT_INT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i));
            }

            FieldCache_Fields.Floats floats = cache.GetFloats(Reader, "theFloat", Random().NextBoolean());
            Assert.AreSame(floats, cache.GetFloats(Reader, "theFloat", Random().NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(floats, cache.GetFloats(Reader, "theFloat", FieldCache_Fields.DEFAULT_FLOAT_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i));
            }

            Bits docsWithField = cache.GetDocsWithField(Reader, "theLong");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array");
            Assert.IsTrue(docsWithField is Bits_MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length() == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length() + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length(); i++)
            {
                Assert.IsTrue(docsWithField.Get(i));
            }

            docsWithField = cache.GetDocsWithField(Reader, "sparse");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array");
            Assert.IsFalse(docsWithField is Bits_MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length() == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length() + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length(); i++)
            {
                Assert.AreEqual(i % 2 == 0, docsWithField.Get(i));
            }

            // getTermsIndex
            SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString");
            Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array");
            BytesRef br = new BytesRef();
            for (int i = 0; i < NUM_DOCS; i++)
            {
                BytesRef term;
                int ord = termsIndex.GetOrd(i);
                if (ord == -1)
                {
                    term = null;
                }
                else
                {
                    termsIndex.LookupOrd(ord, br);
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            int nTerms = termsIndex.ValueCount;

            TermsEnum tenum = termsIndex.TermsEnum();
            BytesRef val = new BytesRef();
            for (int i = 0; i < nTerms; i++)
            {
                BytesRef val1 = tenum.Next();
                termsIndex.LookupOrd(i, val);
                // System.out.println("i="+i);
                Assert.AreEqual(val, val1);
            }

            // seek the enum around (note this isn't a great test here)
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                int k = Random().Next(nTerms);
                termsIndex.LookupOrd(k, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term());
            }

            for (int i = 0; i < nTerms; i++)
            {
                termsIndex.LookupOrd(i, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term());
            }

            // test bad field
            termsIndex = cache.GetTermsIndex(Reader, "bogusfield");

            // getTerms
            BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true);
            Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array");
            Bits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                terms.Get(i, br);
                BytesRef term;
                if (!bits.Get(i))
                {
                    term = null;
                }
                else
                {
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            // test bad field
            terms = cache.GetTerms(Reader, "bogusfield", false);

            // getDocTermOrds
            SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            int numEntries = cache.CacheEntries.Length;
            // ask for it again, and check that we didnt create any additional entries:
            termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            Assert.AreEqual(numEntries, cache.CacheEntries.Length);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                termOrds.Document = i;
                // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
                IList<BytesRef> values = new List<BytesRef>(new /*Linked*/HashSet<BytesRef>(Arrays.AsList(MultiValued[i])));
                foreach (BytesRef v in values)
                {
                    if (v == null)
                    {
                        // why does this test use null values... instead of an empty list: confusing
                        break;
                    }
                    long ord = termOrds.NextOrd();
                    Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
                    BytesRef scratch = new BytesRef();
                    termOrds.LookupOrd(ord, scratch);
                    Assert.AreEqual(v, scratch);
                }
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd());
            }

            // test bad field
            termOrds = cache.GetDocTermOrds(Reader, "bogusfield");
            Assert.IsTrue(termOrds.ValueCount == 0);

            FieldCache_Fields.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey);
        }
Example #49
0
 /// <summary>
 /// Expert: Seeks a specific position by <seealso cref="TermState"/> previously obtained
 /// from <seealso cref="#termState()"/>. Callers should maintain the <seealso cref="TermState"/> to
 /// use this method. Low-level implementations may position the TermsEnum
 /// without re-seeking the term dictionary.
 /// <p>
 /// Seeking by <seealso cref="TermState"/> should only be used iff the state was obtained
 /// from the same <seealso cref="TermsEnum"/> instance.
 /// <p>
 /// NOTE: Using this method with an incompatible <seealso cref="TermState"/> might leave
 /// this <seealso cref="TermsEnum"/> in undefined state. On a segment level
 /// <seealso cref="TermState"/> instances are compatible only iff the source and the
 /// target <seealso cref="TermsEnum"/> operate on the same field. If operating on segment
 /// level, TermState instances must not be used across segments.
 /// <p>
 /// NOTE: A seek by <seealso cref="TermState"/> might not restore the
 /// <seealso cref="AttributeSource"/>'s state. <seealso cref="AttributeSource"/> states must be
 /// maintained separately if this method is used. </summary>
 /// <param name="term"> the term the TermState corresponds to </param>
 /// <param name="state"> the <seealso cref="TermState"/>
 ///  </param>
 public virtual void SeekExact(BytesRef term, TermState state)
 {
     if (!SeekExact(term))
     {
         throw new System.ArgumentException("term=" + term + " does not exist");
     }
 }
Example #50
0
        public virtual void TestNonIndexedFields()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            doc.Add(new StoredField("bogusbytes", "bogus"));
            doc.Add(new StoredField("bogusshorts", "bogus"));
            doc.Add(new StoredField("bogusints", "bogus"));
            doc.Add(new StoredField("boguslongs", "bogus"));
            doc.Add(new StoredField("bogusfloats", "bogus"));
            doc.Add(new StoredField("bogusdoubles", "bogus"));
            doc.Add(new StoredField("bogusterms", "bogus"));
            doc.Add(new StoredField("bogustermsindex", "bogus"));
            doc.Add(new StoredField("bogusmultivalued", "bogus"));
            doc.Add(new StoredField("bogusbits", "bogus"));
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;
            iw.Dispose();

            AtomicReader ar = GetOnlySegmentReader(ir);

            FieldCache cache = FieldCache_Fields.DEFAULT;
            cache.PurgeAllCaches();
            Assert.AreEqual(0, cache.CacheEntries.Length);

            Bytes bytes = cache.GetBytes(ar, "bogusbytes", true);
            Assert.AreEqual(0, bytes.Get(0));

            Shorts shorts = cache.GetShorts(ar, "bogusshorts", true);
            Assert.AreEqual(0, shorts.Get(0));

            Ints ints = cache.GetInts(ar, "bogusints", true);
            Assert.AreEqual(0, ints.Get(0));

            Longs longs = cache.GetLongs(ar, "boguslongs", true);
            Assert.AreEqual(0, longs.Get(0));

            Floats floats = cache.GetFloats(ar, "bogusfloats", true);
            Assert.AreEqual(0, floats.Get(0), 0.0f);

            Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true);
            Assert.AreEqual(0, doubles.Get(0), 0.0D);

            BytesRef scratch = new BytesRef();
            BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true);
            binaries.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex");
            Assert.AreEqual(-1, sorted.GetOrd(0));
            sorted.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued");
            sortedSet.Document = 0;
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());

            Bits bits = cache.GetDocsWithField(ar, "bogusbits");
            Assert.IsFalse(bits.Get(0));

            // check that we cached nothing
            Assert.AreEqual(0, cache.CacheEntries.Length);
            ir.Dispose();
            dir.Dispose();
        }
Example #51
0
 public override SeekStatus SeekCeil(BytesRef term)
 {
     return SeekStatus.END;
 }
Example #52
0
 public override void SeekExact(BytesRef term, TermState state)
 {
     throw new InvalidOperationException("this method should never be called");
 }
Example #53
0
 /// <summary>
 /// Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
 /// this is method is used by <seealso cref="NumericTokenStream"/>.
 /// After encoding, {@code bytes.offset} will always be 0. </summary>
 /// <param name="val"> the numeric value </param>
 /// <param name="shift"> how many bits to strip from the right </param>
 /// <param name="bytes"> will contain the encoded value </param>
 public static void LongToPrefixCoded(long val, int shift, BytesRef bytes)
 {
     LongToPrefixCodedBytes(val, shift, bytes);
 }