public virtual void TestSize() { BytesRef @ref = new BytesRef(); int num = AtLeast(2); for (int j = 0; j < num; j++) { int mod = 1 + Random().Next(39); for (int i = 0; i < 797; i++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random(), 1000); } while (str.Length == 0); @ref.CopyChars(str); int count = Hash.Size(); int key = Hash.Add(@ref); if (key < 0) { Assert.AreEqual(Hash.Size(), count); } else { Assert.AreEqual(Hash.Size(), count + 1); } if (i % mod == 0) { Hash.Clear(); Assert.AreEqual(0, Hash.Size()); Hash.Reinit(); } } } }
public static void Main(string[] args) { FileInfo input = new FileInfo("/home/dweiss/tmp/shuffled.dict"); int buckets = 20; int shareMaxTail = 10; ExternalRefSorter sorter = new ExternalRefSorter(new OfflineSorter()); FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail); TextReader reader = new StreamReader( new FileStream(input.FullName, FileMode.Open), Encoding.UTF8); BytesRef scratch = new BytesRef(); string line; int count = 0; while ((line = reader.ReadLine()) != null) { scratch.CopyChars(line); builder.Add(scratch, count % buckets); if ((count++ % 100000) == 0) { Console.WriteLine("Line: " + count); } } Console.WriteLine("Building FSTCompletion."); FSTCompletion completion = builder.Build(); FileInfo fstFile = new FileInfo("completion.fst"); Console.WriteLine("Done. Writing automaton: " + fstFile.FullName); completion.FST.Save(fstFile); sorter.Dispose(); }
public virtual void TestAppend() { Random random = Random(); BytesRefArray list = new BytesRefArray(Util.Counter.NewCounter()); IList<string> stringList = new List<string>(); for (int j = 0; j < 2; j++) { if (j > 0 && random.NextBoolean()) { list.Clear(); stringList.Clear(); } int entries = AtLeast(500); BytesRef spare = new BytesRef(); int initSize = list.Size(); for (int i = 0; i < entries; i++) { string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random); spare.CopyChars(randomRealisticUnicodeString); Assert.AreEqual(i + initSize, list.Append(spare)); stringList.Add(randomRealisticUnicodeString); } for (int i = 0; i < entries; i++) { Assert.IsNotNull(list.Get(spare, i)); Assert.AreEqual(stringList[i], spare.Utf8ToString(), "entry " + i + " doesn't match"); } // check random for (int i = 0; i < entries; i++) { int e = random.Next(entries); Assert.IsNotNull(list.Get(spare, e)); Assert.AreEqual(stringList[e], spare.Utf8ToString(), "entry " + i + " doesn't match"); } for (int i = 0; i < 2; i++) { BytesRefIterator iterator = list.Iterator(); foreach (string @string in stringList) { Assert.AreEqual(@string, iterator.Next().Utf8ToString()); } } } }
public virtual void TestAdd() { BytesRef @ref = new BytesRef(); BytesRef scratch = new BytesRef(); int num = AtLeast(2); for (int j = 0; j < num; j++) { HashSet<string> strings = new HashSet<string>(); int uniqueCount = 0; for (int i = 0; i < 797; i++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random(), 1000); } while (str.Length == 0); @ref.CopyChars(str); int count = Hash.Size(); int key = Hash.Add(@ref); if (key >= 0) { Assert.IsTrue(strings.Add(str)); Assert.AreEqual(uniqueCount, key); Assert.AreEqual(Hash.Size(), count + 1); uniqueCount++; } else { Assert.IsFalse(strings.Add(str)); Assert.IsTrue((-key) - 1 < count); Assert.AreEqual(str, Hash.Get((-key) - 1, scratch).Utf8ToString()); Assert.AreEqual(count, Hash.Size()); } } AssertAllIn(strings, Hash); Hash.Clear(); Assert.AreEqual(0, Hash.Size()); Hash.Reinit(); } }
public virtual void TestReadAndWrite() { Counter bytesUsed = Util.Counter.NewCounter(); ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); pool.NextBuffer(); bool reuseFirst = Random().NextBoolean(); for (int j = 0; j < 2; j++) { IList<BytesRef> list = new List<BytesRef>(); int maxLength = AtLeast(500); int numValues = AtLeast(100); BytesRef @ref = new BytesRef(); for (int i = 0; i < numValues; i++) { string value = TestUtil.RandomRealisticUnicodeString(Random(), maxLength); list.Add(new BytesRef(value)); @ref.CopyChars(value); pool.Append(@ref); } // verify long position = 0; foreach (BytesRef expected in list) { @ref.Grow(expected.Length); @ref.Length = expected.Length; pool.ReadBytes(position, @ref.Bytes, @ref.Offset, @ref.Length); Assert.AreEqual(expected, @ref); position += @ref.Length; } pool.Reset(Random().NextBoolean(), reuseFirst); if (reuseFirst) { Assert.AreEqual(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed.Get()); } else { Assert.AreEqual(0, bytesUsed.Get()); pool.NextBuffer(); // prepare for next iter } } }
// LUCENENET specific - renaming from Main() because we must only have 1 entry point. // Not sure why this utility is in a test project anyway - this seems like something that should // be in Lucene.Net.Suggest so we can put it into the lucene-cli tool. public static void Main2(string[] args) { FileInfo input = new FileInfo("/home/dweiss/tmp/shuffled.dict"); int buckets = 20; int shareMaxTail = 10; ExternalRefSorter sorter = new ExternalRefSorter(new OfflineSorter()); FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail); TextReader reader = new StreamReader( new FileStream(input.FullName, FileMode.Open), Encoding.UTF8); BytesRef scratch = new BytesRef(); string line; int count = 0; while ((line = reader.ReadLine()) != null) { scratch.CopyChars(line); builder.Add(scratch, count % buckets); if ((count++ % 100000) == 0) { Console.WriteLine("Line: " + count); } } Console.WriteLine("Building FSTCompletion."); FSTCompletion completion = builder.Build(); FileInfo fstFile = new FileInfo("completion.fst"); Console.WriteLine("Done. Writing automaton: " + fstFile.FullName); completion.FST.Save(fstFile); sorter.Dispose(); }
public bool MoveNext() { if (done) { return(false); } bool success = false; bool hasNext = true; try { string line; if ((line = [email protected]()) != null) { spare.CopyChars(line); current = spare; } else { done = true; IOUtils.Dispose(outerInstance.@in); current = null; hasNext = false; } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(outerInstance.@in); } } return(hasNext); }
public virtual void TestSort() { Random random = Random(); BytesRefArray list = new BytesRefArray(Util.Counter.NewCounter()); List<string> stringList = new List<string>(); for (int j = 0; j < 2; j++) { if (j > 0 && random.NextBoolean()) { list.Clear(); stringList.Clear(); } int entries = AtLeast(500); BytesRef spare = new BytesRef(); int initSize = list.Size(); for (int i = 0; i < entries; i++) { string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random); spare.CopyChars(randomRealisticUnicodeString); Assert.AreEqual(initSize + i, list.Append(spare)); stringList.Add(randomRealisticUnicodeString); } stringList.Sort(); BytesRefIterator iter = list.Iterator(BytesRef.UTF8SortedAsUTF16Comparer); int a = 0; while ((spare = iter.Next()) != null) { Assert.AreEqual(stringList[a], spare.Utf8ToString(), "entry " + a + " doesn't match"); a++; } Assert.IsNull(iter.Next()); Assert.AreEqual(a, stringList.Count); } }
public virtual void TestBinary() { Directory dir = NewDirectory(); Document doc = new Document(); BytesRef @ref = new BytesRef(); Field field = new BinaryDocValuesField("bytes", @ref); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { @ref.CopyChars(TestUtil.RandomUnicodeString(Random())); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes"); BinaryDocValues single = merged.GetBinaryDocValues("bytes"); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); for (int i = 0; i < numDocs; i++) { single.Get(i, expected); multi.Get(i, actual); Assert.AreEqual(expected, actual); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
/// <summary> /// returns the bytes representation of the str val - TODO: should this return the indexed raw bytes not? </summary> public virtual bool BytesVal(int doc, BytesRef target) { string s = StrVal(doc); if (s == null) { target.Length = 0; return false; } target.CopyChars(s); return true; }
private void AssertAllIn(ISet<string> strings, BytesRefHash hash) { BytesRef @ref = new BytesRef(); BytesRef scratch = new BytesRef(); int count = hash.Size(); foreach (string @string in strings) { @ref.CopyChars(@string); int key = hash.Add(@ref); // add again to check duplicates Assert.AreEqual(@string, hash.Get((-key) - 1, scratch).Utf8ToString()); Assert.AreEqual(count, hash.Size()); Assert.IsTrue(key < count, "key: " + key + " count: " + count + " string: " + @string); } }
public virtual void TestSort() { BytesRef @ref = new BytesRef(); int num = AtLeast(2); for (int j = 0; j < num; j++) { SortedSet<string> strings = new SortedSet<string>(); for (int k = 0; k < 797; k++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random(), 1000); } while (str.Length == 0); @ref.CopyChars(str); Hash.Add(@ref); strings.Add(str); } // We use the UTF-16 comparator here, because we need to be able to // compare to native String.CompareTo() [UTF-16]: int[] sort = Hash.Sort(BytesRef.UTF8SortedAsUTF16Comparer); Assert.IsTrue(strings.Count < sort.Length); int i = 0; BytesRef scratch = new BytesRef(); foreach (string @string in strings) { @ref.CopyChars(@string); Assert.AreEqual(@ref, Hash.Get(sort[i++], scratch)); } Hash.Clear(); Assert.AreEqual(0, Hash.Size()); Hash.Reinit(); } }
public virtual void TestGet() { BytesRef @ref = new BytesRef(); BytesRef scratch = new BytesRef(); int num = AtLeast(2); for (int j = 0; j < num; j++) { IDictionary<string, int?> strings = new Dictionary<string, int?>(); int uniqueCount = 0; for (int i = 0; i < 797; i++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random(), 1000); } while (str.Length == 0); @ref.CopyChars(str); int count = Hash.Size(); int key = Hash.Add(@ref); if (key >= 0) { Assert.IsFalse(strings.ContainsKey(str)); strings[str] = Convert.ToInt32(key); Assert.AreEqual(uniqueCount, key); uniqueCount++; Assert.AreEqual(Hash.Size(), count + 1); } else { Assert.IsTrue((-key) - 1 < count); Assert.AreEqual(Hash.Size(), count); } } foreach (KeyValuePair<string, int?> entry in strings) { @ref.CopyChars(entry.Key); Assert.AreEqual(@ref, Hash.Get((int)entry.Value, scratch)); } Hash.Clear(); Assert.AreEqual(0, Hash.Size()); Hash.Reinit(); } }
public virtual void TestCompact() { BytesRef @ref = new BytesRef(); int num = AtLeast(2); for (int j = 0; j < num; j++) { int numEntries = 0; const int size = 797; BitArray bits = new BitArray(size); for (int i = 0; i < size; i++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random(), 1000); } while (str.Length == 0); @ref.CopyChars(str); int key = Hash.Add(@ref); if (key < 0) { Assert.IsTrue(bits.SafeGet((-key) - 1)); } else { Assert.IsFalse(bits.SafeGet(key)); bits.SafeSet(key, true); numEntries++; } } Assert.AreEqual(Hash.Size(), bits.Cardinality()); Assert.AreEqual(numEntries, bits.Cardinality()); Assert.AreEqual(numEntries, Hash.Size()); int[] compact = Hash.Compact(); Assert.IsTrue(numEntries < compact.Length); for (int i = 0; i < numEntries; i++) { bits.SafeSet(compact[i], false); } Assert.AreEqual(0, bits.Cardinality()); Hash.Clear(); Assert.AreEqual(0, Hash.Size()); Hash.Reinit(); } }
public virtual void TestAddByPoolOffset() { BytesRef @ref = new BytesRef(); BytesRef scratch = new BytesRef(); BytesRefHash offsetHash = NewHash(Pool); int num = AtLeast(2); for (int j = 0; j < num; j++) { HashSet<string> strings = new HashSet<string>(); int uniqueCount = 0; for (int i = 0; i < 797; i++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random(), 1000); } while (str.Length == 0); @ref.CopyChars(str); int count = Hash.Size(); int key = Hash.Add(@ref); if (key >= 0) { Assert.IsTrue(strings.Add(str)); Assert.AreEqual(uniqueCount, key); Assert.AreEqual(Hash.Size(), count + 1); int offsetKey = offsetHash.AddByPoolOffset(Hash.ByteStart(key)); Assert.AreEqual(uniqueCount, offsetKey); Assert.AreEqual(offsetHash.Size(), count + 1); uniqueCount++; } else { Assert.IsFalse(strings.Add(str)); Assert.IsTrue((-key) - 1 < count); Assert.AreEqual(str, Hash.Get((-key) - 1, scratch).Utf8ToString()); Assert.AreEqual(count, Hash.Size()); int offsetKey = offsetHash.AddByPoolOffset(Hash.ByteStart((-key) - 1)); Assert.IsTrue((-offsetKey) - 1 < count); Assert.AreEqual(str, Hash.Get((-offsetKey) - 1, scratch).Utf8ToString()); Assert.AreEqual(count, Hash.Size()); } } AssertAllIn(strings, Hash); foreach (string @string in strings) { @ref.CopyChars(@string); int key = Hash.Add(@ref); BytesRef bytesRef = offsetHash.Get((-key) - 1, scratch); Assert.AreEqual(@ref, bytesRef); } Hash.Clear(); Assert.AreEqual(0, Hash.Size()); offsetHash.Clear(); Assert.AreEqual(0, offsetHash.Size()); Hash.Reinit(); // init for the next round offsetHash.Reinit(); } }