BytesRefHash is a special purpose hash-map like data-structure optimized for BytesRef instances. BytesRefHash maintains mappings of byte arrays to ids (Map<BytesRef,int>) storing the hashed bytes efficiently in continuous storage. The mapping to the id is encapsulated inside BytesRefHash and is guaranteed to be increased for each added BytesRef.

Note: The maximum capacity BytesRef instance passed to #add(BytesRef) must not be longer than ByteBlockPool#BYTE_BLOCK_SIZE-2. The internal storage is limited to 2GB total byte storage.

@lucene.internal
コード例 #1
0
ファイル: TermsQuery.cs プロジェクト: Cefa68000/lucenenet
 /// <summary>
 /// 
 /// </summary>
 /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param>
 /// <param name="fromQuery"></param>
 /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param>
 internal TermsQuery(string field, Query fromQuery, BytesRefHash terms)
     : base(field)
 {
     _fromQuery = fromQuery;
     _terms = terms;
     _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
 }
コード例 #2
0
 public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
 {
     this.FieldInfo = fieldInfo;
     this.IwBytesUsed = iwBytesUsed;
     Hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new BytesRefHash.DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     Pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
     BytesUsed = Pending.RamBytesUsed();
     iwBytesUsed.AddAndGet(BytesUsed);
 }
コード例 #3
0
 public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq)
 {
     this.terms = terms;
     this.sliceArray = sliceArray;
     this.numTokens = numTokens;
     this.numOverlapTokens = numOverlapTokens;
     this.boost = boost;
     this.sumTotalTermFreq = sumTotalTermFreq;
     this.lastPosition = lastPosition;
     this.lastOffset = lastOffset;
 }
コード例 #4
0
 private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
     float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
 {
     _field = field;
     _multipleValuesPerDocument = multipleValuesPerDocument;
     _terms = terms;
     _scores = scores;
     _originalQuery = originalQuery;
     _ords = ords;
     _unwrittenOriginalQuery = unwrittenOriginalQuery;
 }
コード例 #5
0
 internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
     float[] scores, Query originalQuery)
 {
     _field = field;
     _multipleValuesPerDocument = multipleValuesPerDocument;
     _terms = terms;
     _scores = scores;
     _originalQuery = originalQuery;
     _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
     _unwrittenOriginalQuery = originalQuery;
 }
コード例 #6
0
ファイル: TestBytesRefHash.cs プロジェクト: clieben/lucenenet
        private void AssertAllIn(ISet <string> strings, BytesRefHash hash)
        {
            BytesRef @ref    = new BytesRef();
            BytesRef scratch = new BytesRef();
            int      count   = hash.Count;

            foreach (string @string in strings)
            {
                @ref.CopyChars(@string);
                int key = hash.Add(@ref); // add again to check duplicates
                Assert.AreEqual(@string, hash.Get((-key) - 1, scratch).Utf8ToString());
                Assert.AreEqual(count, hash.Count);
                Assert.IsTrue(key < count, "key: " + key + " count: " + count + " string: " + @string);
            }
        }
コード例 #7
0
ファイル: TestBytesRefHash.cs プロジェクト: clieben/lucenenet
        public virtual void TestAddByPoolOffset()
        {
            BytesRef     @ref       = new BytesRef();
            BytesRef     scratch    = new BytesRef();
            BytesRefHash offsetHash = NewHash(Pool);
            int          num        = AtLeast(2);

            for (int j = 0; j < num; j++)
            {
                ISet <string> strings     = new JCG.HashSet <string>();
                int           uniqueCount = 0;
                for (int i = 0; i < 797; i++)
                {
                    string str;
                    do
                    {
                        str = TestUtil.RandomRealisticUnicodeString(Random, 1000);
                    } while (str.Length == 0);
                    @ref.CopyChars(str);
                    int count = Hash.Count;
                    int key   = Hash.Add(@ref);

                    if (key >= 0)
                    {
                        Assert.IsTrue(strings.Add(str));
                        Assert.AreEqual(uniqueCount, key);
                        Assert.AreEqual(Hash.Count, count + 1);
                        int offsetKey = offsetHash.AddByPoolOffset(Hash.ByteStart(key));
                        Assert.AreEqual(uniqueCount, offsetKey);
                        Assert.AreEqual(offsetHash.Count, count + 1);
                        uniqueCount++;
                    }
                    else
                    {
                        Assert.IsFalse(strings.Add(str));
                        Assert.IsTrue((-key) - 1 < count);
                        Assert.AreEqual(str, Hash.Get((-key) - 1, scratch).Utf8ToString());
                        Assert.AreEqual(count, Hash.Count);
                        int offsetKey = offsetHash.AddByPoolOffset(Hash.ByteStart((-key) - 1));
                        Assert.IsTrue((-offsetKey) - 1 < count);
                        Assert.AreEqual(str, Hash.Get((-offsetKey) - 1, scratch).Utf8ToString());
                        Assert.AreEqual(count, Hash.Count);
                    }
                }

                AssertAllIn(strings, Hash);
                foreach (string @string in strings)
                {
                    @ref.CopyChars(@string);
                    int      key      = Hash.Add(@ref);
                    BytesRef bytesRef = offsetHash.Get((-key) - 1, scratch);
                    Assert.AreEqual(@ref, bytesRef);
                }

                Hash.Clear();
                Assert.AreEqual(0, Hash.Count);
                offsetHash.Clear();
                Assert.AreEqual(0, offsetHash.Count);
                Hash.Reinit(); // init for the next round
                offsetHash.Reinit();
            }
        }
コード例 #8
0
ファイル: TestBytesRefHash.cs プロジェクト: clieben/lucenenet
 public override void SetUp()
 {
     base.SetUp();
     Pool = NewPool();
     Hash = NewHash(Pool);
 }
コード例 #9
0
 public override void SetUp()
 {
     base.SetUp();
     pool = NewPool();
     hash = NewHash(pool);
 }
コード例 #10
0
 public IntroSorterAnonymousInnerClassHelper(BytesRefHash outerInstance, IComparer<BytesRef> comp, int[] compact)
 {
     this.OuterInstance = outerInstance;
     this.Comp = comp;
     this.Compact = compact;
 }
コード例 #11
0
 public IntroSorterAnonymousClass(BytesRefHash outerInstance, IComparer <BytesRef> comp, int[] compact)
 {
     this.outerInstance = outerInstance;
     this.comp          = comp;
     this.compact       = compact;
 }
コード例 #12
0
ファイル: SynonymMap.cs プロジェクト: Cefa68000/lucenenet
	  public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext)
	  {
		this.fst = fst;
		this.words = words;
		this.maxHorizontalContext = maxHorizontalContext;
	  }
コード例 #13
0
ファイル: TermsQuery.cs プロジェクト: Cefa68000/lucenenet
 internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords)
     : base(tenum)
 {
     Terms = terms;
     Ords = ords;
     _comparator = BytesRef.UTF8SortedAsUnicodeComparer;
     _lastElement = terms.Size() - 1;
     _lastTerm = terms.Get(ords[_lastElement], new BytesRef());
     _seekTerm = terms.Get(ords[_upto], _spare);
 }
コード例 #14
0
 public override void SetUp()
 {
     base.SetUp();
     Pool = NewPool();
     Hash = NewHash(Pool);
 }
コード例 #15
0
 private void AssertAllIn(ISet<string> strings, BytesRefHash hash)
 {
     BytesRef @ref = new BytesRef();
     BytesRef scratch = new BytesRef();
     int count = hash.Size();
     foreach (string @string in strings)
     {
         @ref.CopyChars(@string);
         int key = hash.Add(@ref); // add again to check duplicates
         Assert.AreEqual(@string, hash.Get((-key) - 1, scratch).Utf8ToString());
         Assert.AreEqual(count, hash.Size());
         Assert.IsTrue(key < count, "key: " + key + " count: " + count + " string: " + @string);
     }
 }