コード例 #1
0
 public virtual void TestSize()
 {
     BytesRef @ref = new BytesRef();
     int num = AtLeast(2);
     for (int j = 0; j < num; j++)
     {
         int mod = 1 + Random().Next(39);
         for (int i = 0; i < 797; i++)
         {
             string str;
             do
             {
                 str = TestUtil.RandomRealisticUnicodeString(Random(), 1000);
             } while (str.Length == 0);
             @ref.CopyChars(str);
             int count = Hash.Size();
             int key = Hash.Add(@ref);
             if (key < 0)
             {
                 Assert.AreEqual(Hash.Size(), count);
             }
             else
             {
                 Assert.AreEqual(Hash.Size(), count + 1);
             }
             if (i % mod == 0)
             {
                 Hash.Clear();
                 Assert.AreEqual(0, Hash.Size());
                 Hash.Reinit();
             }
         }
     }
 }
コード例 #2
0
        public static void Main(string[] args)
        {
            FileInfo input = new FileInfo("/home/dweiss/tmp/shuffled.dict");

            int buckets = 20;
            int shareMaxTail = 10;

            ExternalRefSorter sorter = new ExternalRefSorter(new OfflineSorter());
            FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail);

            TextReader reader =
                new StreamReader(
                    new FileStream(input.FullName, FileMode.Open), Encoding.UTF8);

            BytesRef scratch = new BytesRef();
            string line;
            int count = 0;
            while ((line = reader.ReadLine()) != null)
            {
                scratch.CopyChars(line);
                builder.Add(scratch, count % buckets);
                if ((count++ % 100000) == 0)
                {
                    Console.WriteLine("Line: " + count);
                }
            }

            Console.WriteLine("Building FSTCompletion.");
            FSTCompletion completion = builder.Build();

            FileInfo fstFile = new FileInfo("completion.fst");
            Console.WriteLine("Done. Writing automaton: " + fstFile.FullName);
            completion.FST.Save(fstFile);
            sorter.Dispose();
        }
コード例 #3
0
        public virtual void TestAppend()
        {
            Random random = Random();
            BytesRefArray list = new BytesRefArray(Util.Counter.NewCounter());
            IList<string> stringList = new List<string>();
            for (int j = 0; j < 2; j++)
            {
                if (j > 0 && random.NextBoolean())
                {
                    list.Clear();
                    stringList.Clear();
                }
                int entries = AtLeast(500);
                BytesRef spare = new BytesRef();
                int initSize = list.Size();
                for (int i = 0; i < entries; i++)
                {
                    string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random);
                    spare.CopyChars(randomRealisticUnicodeString);
                    Assert.AreEqual(i + initSize, list.Append(spare));
                    stringList.Add(randomRealisticUnicodeString);
                }
                for (int i = 0; i < entries; i++)
                {
                    Assert.IsNotNull(list.Get(spare, i));
                    Assert.AreEqual(stringList[i], spare.Utf8ToString(), "entry " + i + " doesn't match");
                }

                // check random
                for (int i = 0; i < entries; i++)
                {
                    int e = random.Next(entries);
                    Assert.IsNotNull(list.Get(spare, e));
                    Assert.AreEqual(stringList[e], spare.Utf8ToString(), "entry " + i + " doesn't match");
                }
                for (int i = 0; i < 2; i++)
                {
                    BytesRefIterator iterator = list.Iterator();
                    foreach (string @string in stringList)
                    {
                        Assert.AreEqual(@string, iterator.Next().Utf8ToString());
                    }
                }
            }
        }
コード例 #4
0
        public virtual void TestAdd()
        {
            BytesRef @ref = new BytesRef();
            BytesRef scratch = new BytesRef();
            int num = AtLeast(2);
            for (int j = 0; j < num; j++)
            {
                HashSet<string> strings = new HashSet<string>();
                int uniqueCount = 0;
                for (int i = 0; i < 797; i++)
                {
                    string str;
                    do
                    {
                        str = TestUtil.RandomRealisticUnicodeString(Random(), 1000);
                    } while (str.Length == 0);
                    @ref.CopyChars(str);
                    int count = Hash.Size();
                    int key = Hash.Add(@ref);

                    if (key >= 0)
                    {
                        Assert.IsTrue(strings.Add(str));
                        Assert.AreEqual(uniqueCount, key);
                        Assert.AreEqual(Hash.Size(), count + 1);
                        uniqueCount++;
                    }
                    else
                    {
                        Assert.IsFalse(strings.Add(str));
                        Assert.IsTrue((-key) - 1 < count);
                        Assert.AreEqual(str, Hash.Get((-key) - 1, scratch).Utf8ToString());
                        Assert.AreEqual(count, Hash.Size());
                    }
                }

                AssertAllIn(strings, Hash);
                Hash.Clear();
                Assert.AreEqual(0, Hash.Size());
                Hash.Reinit();
            }
        }
コード例 #5
0
 public virtual void TestReadAndWrite()
 {
     Counter bytesUsed = Util.Counter.NewCounter();
     ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
     pool.NextBuffer();
     bool reuseFirst = Random().NextBoolean();
     for (int j = 0; j < 2; j++)
     {
         IList<BytesRef> list = new List<BytesRef>();
         int maxLength = AtLeast(500);
         int numValues = AtLeast(100);
         BytesRef @ref = new BytesRef();
         for (int i = 0; i < numValues; i++)
         {
             string value = TestUtil.RandomRealisticUnicodeString(Random(), maxLength);
             list.Add(new BytesRef(value));
             @ref.CopyChars(value);
             pool.Append(@ref);
         }
         // verify
         long position = 0;
         foreach (BytesRef expected in list)
         {
             @ref.Grow(expected.Length);
             @ref.Length = expected.Length;
             pool.ReadBytes(position, @ref.Bytes, @ref.Offset, @ref.Length);
             Assert.AreEqual(expected, @ref);
             position += @ref.Length;
         }
         pool.Reset(Random().NextBoolean(), reuseFirst);
         if (reuseFirst)
         {
             Assert.AreEqual(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed.Get());
         }
         else
         {
             Assert.AreEqual(0, bytesUsed.Get());
             pool.NextBuffer(); // prepare for next iter
         }
     }
 }
コード例 #6
0
        // LUCENENET specific - renaming from Main() because we must only have 1 entry point.
        // Not sure why this utility is in a test project anyway - this seems like something that should
        // be in Lucene.Net.Suggest so we can put it into the lucene-cli tool.
        public static void Main2(string[] args)
        {
            FileInfo input = new FileInfo("/home/dweiss/tmp/shuffled.dict");

            int buckets      = 20;
            int shareMaxTail = 10;

            ExternalRefSorter    sorter  = new ExternalRefSorter(new OfflineSorter());
            FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail);

            TextReader reader =
                new StreamReader(
                    new FileStream(input.FullName, FileMode.Open), Encoding.UTF8);

            BytesRef scratch = new BytesRef();
            string   line;
            int      count = 0;

            while ((line = reader.ReadLine()) != null)
            {
                scratch.CopyChars(line);
                builder.Add(scratch, count % buckets);
                if ((count++ % 100000) == 0)
                {
                    Console.WriteLine("Line: " + count);
                }
            }

            Console.WriteLine("Building FSTCompletion.");
            FSTCompletion completion = builder.Build();

            FileInfo fstFile = new FileInfo("completion.fst");

            Console.WriteLine("Done. Writing automaton: " + fstFile.FullName);
            completion.FST.Save(fstFile);
            sorter.Dispose();
        }
コード例 #7
0
            public bool MoveNext()
            {
                if (done)
                {
                    return(false);
                }

                bool success = false;
                bool hasNext = true;

                try
                {
                    string line;
                    if ((line = [email protected]()) != null)
                    {
                        spare.CopyChars(line);
                        current = spare;
                    }
                    else
                    {
                        done = true;
                        IOUtils.Dispose(outerInstance.@in);
                        current = null;
                        hasNext = false;
                    }
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        IOUtils.DisposeWhileHandlingException(outerInstance.@in);
                    }
                }
                return(hasNext);
            }
コード例 #8
0
        public virtual void TestSort()
        {
            Random random = Random();
            BytesRefArray list = new BytesRefArray(Util.Counter.NewCounter());
            List<string> stringList = new List<string>();

            for (int j = 0; j < 2; j++)
            {
                if (j > 0 && random.NextBoolean())
                {
                    list.Clear();
                    stringList.Clear();
                }
                int entries = AtLeast(500);
                BytesRef spare = new BytesRef();
                int initSize = list.Size();
                for (int i = 0; i < entries; i++)
                {
                    string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random);
                    spare.CopyChars(randomRealisticUnicodeString);
                    Assert.AreEqual(initSize + i, list.Append(spare));
                    stringList.Add(randomRealisticUnicodeString);
                }

                stringList.Sort();
                BytesRefIterator iter = list.Iterator(BytesRef.UTF8SortedAsUTF16Comparer);
                int a = 0;
                while ((spare = iter.Next()) != null)
                {
                    Assert.AreEqual(stringList[a], spare.Utf8ToString(), "entry " + a + " doesn't match");
                    a++;
                }
                Assert.IsNull(iter.Next());
                Assert.AreEqual(a, stringList.Count);
            }
        }
コード例 #9
0
        public virtual void TestBinary()
        {
            Directory dir = NewDirectory();
            Document doc = new Document();
            BytesRef @ref = new BytesRef();
            Field field = new BinaryDocValuesField("bytes", @ref);
            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                @ref.CopyChars(TestUtil.RandomUnicodeString(Random()));
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;
            iw.ForceMerge(1);
            DirectoryReader ir2 = iw.Reader;
            AtomicReader merged = GetOnlySegmentReader(ir2);
            iw.Dispose();

            BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes");
            BinaryDocValues single = merged.GetBinaryDocValues("bytes");
            BytesRef actual = new BytesRef();
            BytesRef expected = new BytesRef();
            for (int i = 0; i < numDocs; i++)
            {
                single.Get(i, expected);
                multi.Get(i, actual);
                Assert.AreEqual(expected, actual);
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
コード例 #10
0
ファイル: FunctionValues.cs プロジェクト: Cefa68000/lucenenet
 /// <summary>
 /// returns the bytes representation of the str val - TODO: should this return the indexed raw bytes not? </summary>
 public virtual bool BytesVal(int doc, BytesRef target)
 {
     string s = StrVal(doc);
     if (s == null)
     {
         target.Length = 0;
         return false;
     }
     target.CopyChars(s);
     return true;
 }
コード例 #11
0
 private void AssertAllIn(ISet<string> strings, BytesRefHash hash)
 {
     BytesRef @ref = new BytesRef();
     BytesRef scratch = new BytesRef();
     int count = hash.Size();
     foreach (string @string in strings)
     {
         @ref.CopyChars(@string);
         int key = hash.Add(@ref); // add again to check duplicates
         Assert.AreEqual(@string, hash.Get((-key) - 1, scratch).Utf8ToString());
         Assert.AreEqual(count, hash.Size());
         Assert.IsTrue(key < count, "key: " + key + " count: " + count + " string: " + @string);
     }
 }
コード例 #12
0
 public virtual void TestSort()
 {
     BytesRef @ref = new BytesRef();
     int num = AtLeast(2);
     for (int j = 0; j < num; j++)
     {
         SortedSet<string> strings = new SortedSet<string>();
         for (int k = 0; k < 797; k++)
         {
             string str;
             do
             {
                 str = TestUtil.RandomRealisticUnicodeString(Random(), 1000);
             } while (str.Length == 0);
             @ref.CopyChars(str);
             Hash.Add(@ref);
             strings.Add(str);
         }
         // We use the UTF-16 comparator here, because we need to be able to
         // compare to native String.CompareTo() [UTF-16]:
         int[] sort = Hash.Sort(BytesRef.UTF8SortedAsUTF16Comparer);
         Assert.IsTrue(strings.Count < sort.Length);
         int i = 0;
         BytesRef scratch = new BytesRef();
         foreach (string @string in strings)
         {
             @ref.CopyChars(@string);
             Assert.AreEqual(@ref, Hash.Get(sort[i++], scratch));
         }
         Hash.Clear();
         Assert.AreEqual(0, Hash.Size());
         Hash.Reinit();
     }
 }
コード例 #13
0
 public virtual void TestGet()
 {
     BytesRef @ref = new BytesRef();
     BytesRef scratch = new BytesRef();
     int num = AtLeast(2);
     for (int j = 0; j < num; j++)
     {
         IDictionary<string, int?> strings = new Dictionary<string, int?>();
         int uniqueCount = 0;
         for (int i = 0; i < 797; i++)
         {
             string str;
             do
             {
                 str = TestUtil.RandomRealisticUnicodeString(Random(), 1000);
             } while (str.Length == 0);
             @ref.CopyChars(str);
             int count = Hash.Size();
             int key = Hash.Add(@ref);
             if (key >= 0)
             {
                 Assert.IsFalse(strings.ContainsKey(str));
                 strings[str] = Convert.ToInt32(key);
                 Assert.AreEqual(uniqueCount, key);
                 uniqueCount++;
                 Assert.AreEqual(Hash.Size(), count + 1);
             }
             else
             {
                 Assert.IsTrue((-key) - 1 < count);
                 Assert.AreEqual(Hash.Size(), count);
             }
         }
         foreach (KeyValuePair<string, int?> entry in strings)
         {
             @ref.CopyChars(entry.Key);
             Assert.AreEqual(@ref, Hash.Get((int)entry.Value, scratch));
         }
         Hash.Clear();
         Assert.AreEqual(0, Hash.Size());
         Hash.Reinit();
     }
 }
コード例 #14
0
 public virtual void TestCompact()
 {
     BytesRef @ref = new BytesRef();
     int num = AtLeast(2);
     for (int j = 0; j < num; j++)
     {
         int numEntries = 0;
         const int size = 797;
         BitArray bits = new BitArray(size);
         for (int i = 0; i < size; i++)
         {
             string str;
             do
             {
                 str = TestUtil.RandomRealisticUnicodeString(Random(), 1000);
             } while (str.Length == 0);
             @ref.CopyChars(str);
             int key = Hash.Add(@ref);
             if (key < 0)
             {
                 Assert.IsTrue(bits.SafeGet((-key) - 1));
             }
             else
             {
                 Assert.IsFalse(bits.SafeGet(key));
                 bits.SafeSet(key, true);
                 numEntries++;
             }
         }
         Assert.AreEqual(Hash.Size(), bits.Cardinality());
         Assert.AreEqual(numEntries, bits.Cardinality());
         Assert.AreEqual(numEntries, Hash.Size());
         int[] compact = Hash.Compact();
         Assert.IsTrue(numEntries < compact.Length);
         for (int i = 0; i < numEntries; i++)
         {
             bits.SafeSet(compact[i], false);
         }
         Assert.AreEqual(0, bits.Cardinality());
         Hash.Clear();
         Assert.AreEqual(0, Hash.Size());
         Hash.Reinit();
     }
 }
コード例 #15
0
        public virtual void TestAddByPoolOffset()
        {
            BytesRef @ref = new BytesRef();
            BytesRef scratch = new BytesRef();
            BytesRefHash offsetHash = NewHash(Pool);
            int num = AtLeast(2);
            for (int j = 0; j < num; j++)
            {
                HashSet<string> strings = new HashSet<string>();
                int uniqueCount = 0;
                for (int i = 0; i < 797; i++)
                {
                    string str;
                    do
                    {
                        str = TestUtil.RandomRealisticUnicodeString(Random(), 1000);
                    } while (str.Length == 0);
                    @ref.CopyChars(str);
                    int count = Hash.Size();
                    int key = Hash.Add(@ref);

                    if (key >= 0)
                    {
                        Assert.IsTrue(strings.Add(str));
                        Assert.AreEqual(uniqueCount, key);
                        Assert.AreEqual(Hash.Size(), count + 1);
                        int offsetKey = offsetHash.AddByPoolOffset(Hash.ByteStart(key));
                        Assert.AreEqual(uniqueCount, offsetKey);
                        Assert.AreEqual(offsetHash.Size(), count + 1);
                        uniqueCount++;
                    }
                    else
                    {
                        Assert.IsFalse(strings.Add(str));
                        Assert.IsTrue((-key) - 1 < count);
                        Assert.AreEqual(str, Hash.Get((-key) - 1, scratch).Utf8ToString());
                        Assert.AreEqual(count, Hash.Size());
                        int offsetKey = offsetHash.AddByPoolOffset(Hash.ByteStart((-key) - 1));
                        Assert.IsTrue((-offsetKey) - 1 < count);
                        Assert.AreEqual(str, Hash.Get((-offsetKey) - 1, scratch).Utf8ToString());
                        Assert.AreEqual(count, Hash.Size());
                    }
                }

                AssertAllIn(strings, Hash);
                foreach (string @string in strings)
                {
                    @ref.CopyChars(@string);
                    int key = Hash.Add(@ref);
                    BytesRef bytesRef = offsetHash.Get((-key) - 1, scratch);
                    Assert.AreEqual(@ref, bytesRef);
                }

                Hash.Clear();
                Assert.AreEqual(0, Hash.Size());
                offsetHash.Clear();
                Assert.AreEqual(0, offsetHash.Size());
                Hash.Reinit(); // init for the next round
                offsetHash.Reinit();
            }
        }