Example #1
0
        public void Can_find_exact()
        {
            var  tree = new LcrsTrie('\0', false);
            Word word;

            Assert.False(tree.HasWord("xxx", out word));

            tree.Add("xxx");

            Assert.True(tree.HasWord("xxx", out word));
            Assert.False(tree.HasWord("baby", out word));
            Assert.False(tree.HasWord("dad", out word));

            tree.Add("baby");

            Assert.True(tree.HasWord("xxx", out word));
            Assert.True(tree.HasWord("baby", out word));
            Assert.False(tree.HasWord("dad", out word));

            tree.Add("dad");

            Assert.True(tree.HasWord("xxx", out word));
            Assert.True(tree.HasWord("baby", out word));
            Assert.True(tree.HasWord("dad", out word));
        }
Example #2
0
        public void Can_find_near()
        {
            var tree = new LcrsTrie('\0', false);
            var near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList();

            Assert.IsFalse(near.Any());

            tree.Add("bad");
            near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList();

            Assert.AreEqual(1, near.Count);
            Assert.IsTrue(near.Contains("bad"));

            tree.Add("baby");
            near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList();

            Assert.AreEqual(1, near.Count);
            Assert.IsTrue(near.Contains("bad"));

            tree.Add("b");
            near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList();

            Assert.AreEqual(2, near.Count);
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("b"));

            near = tree.SemanticallyNear("ba", 2).Select(w => w.Value).ToList();

            Assert.AreEqual(3, near.Count);
            Assert.IsTrue(near.Contains("b"));
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("baby"));

            near = tree.SemanticallyNear("ba", 0).Select(w => w.Value).ToList();

            Assert.AreEqual(0, near.Count);

            tree.Add("bananas");
            near = tree.SemanticallyNear("ba", 6).Select(w => w.Value).ToList();

            Assert.AreEqual(4, near.Count);
            Assert.IsTrue(near.Contains("b"));
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("baby"));
            Assert.IsTrue(near.Contains("bananas"));

            near = tree.SemanticallyNear("bazy", 1).Select(w => w.Value).ToList();

            Assert.AreEqual(1, near.Count);
            Assert.IsTrue(near.Contains("baby"));

            tree.Add("bank");
            near = tree.SemanticallyNear("bazy", 3).Select(w => w.Value).ToList();

            Assert.AreEqual(4, near.Count);
            Assert.IsTrue(near.Contains("baby"));
            Assert.IsTrue(near.Contains("bank"));
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("b"));
        }
Example #3
0
        public void Can_find_near()
        {
            var tree = new LcrsTrie('\0', false);
            var near = tree.Near("ba", 1).Select(w => w.Value).ToList();

            Assert.That(near, Is.Empty);

            tree.Add("bad");
            near = tree.Near("ba", 1).Select(w => w.Value).ToList();

            Assert.That(near.Count, Is.EqualTo(1));
            Assert.IsTrue(near.Contains("bad"));

            tree.Add("baby");
            near = tree.Near("ba", 1).Select(w => w.Value).ToList();

            Assert.That(near.Count, Is.EqualTo(1));
            Assert.IsTrue(near.Contains("bad"));

            tree.Add("b");
            near = tree.Near("ba", 1).Select(w => w.Value).ToList();

            Assert.That(near.Count, Is.EqualTo(2));
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("b"));

            near = tree.Near("ba", 2).Select(w => w.Value).ToList();

            Assert.That(near.Count, Is.EqualTo(3));
            Assert.IsTrue(near.Contains("b"));
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("baby"));

            near = tree.Near("ba", 0).Select(w => w.Value).ToList();

            Assert.That(near.Count, Is.EqualTo(0));

            tree.Add("bananas");
            near = tree.Near("ba", 6).Select(w => w.Value).ToList();

            Assert.That(near.Count, Is.EqualTo(4));
            Assert.IsTrue(near.Contains("b"));
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("baby"));
            Assert.IsTrue(near.Contains("bananas"));

            near = tree.Near("bazy", 1).Select(w => w.Value).ToList();

            Assert.That(near.Count, Is.EqualTo(1));
            Assert.IsTrue(near.Contains("baby"));

            tree.Add("bank");
            near = tree.Near("bazy", 3).Select(w => w.Value).ToList();

            Assert.AreEqual(4, near.Count);
            Assert.IsTrue(near.Contains("baby"));
            Assert.IsTrue(near.Contains("bank"));
            Assert.IsTrue(near.Contains("bad"));
            Assert.IsTrue(near.Contains("b"));
        }
Example #4
0
        public void Can_find_exact()
        {
            var  tree = new LcrsTrie('\0', false);
            Word word;

            Assert.IsFalse(tree.IsWord("xxx").Any());

            tree.Add("xxx");

            Assert.IsTrue(tree.IsWord("xxx").Any());
            Assert.IsFalse(tree.IsWord("baby").Any());
            Assert.IsFalse(tree.IsWord("dad").Any());

            tree.Add("baby");

            Assert.IsTrue(tree.IsWord("xxx").Any());
            Assert.IsTrue(tree.IsWord("baby").Any());
            Assert.IsFalse(tree.IsWord("dad").Any());

            tree.Add("dad");

            Assert.IsTrue(tree.IsWord("xxx").Any());
            Assert.IsTrue(tree.IsWord("baby").Any());
            Assert.IsTrue(tree.IsWord("dad").Any());
        }
Example #5
0
        public void Can_get_weight()
        {
            var tree = new LcrsTrie('\0', false);

            tree.Add("pap");
            tree.Add("papp");
            tree.Add("papaya");

            Assert.AreEqual(8, tree.Weight);

            tree.Add("ape");
            tree.Add("apelsin");

            Assert.AreEqual(15, tree.Weight);
        }
Example #6
0
        public void Can_append()
        {
            var root = new LcrsTrie('\0', false);

            root.Add("baby");
            root.Add("bad");

            Assert.That(root.LeftChild.Value, Is.EqualTo('b'));
            Assert.That(root.LeftChild.LeftChild.Value, Is.EqualTo('a'));
            Assert.That(root.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('d'));

            Assert.That(root.LeftChild.LeftChild.LeftChild.RightSibling.Value, Is.EqualTo('b'));
            Assert.That(root.LeftChild.LeftChild.LeftChild.RightSibling.LeftChild.Value, Is.EqualTo('y'));

            Assert.True(root.HasWord("baby"));
            Assert.True(root.HasWord("bad"));
        }
Example #7
0
        public void Can_find_within_range()
        {
            var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_within_range.tri");

            var tree = new LcrsTrie();

            tree.Add("ape");
            tree.Add("app");
            tree.Add("apple");
            tree.Add("banana");
            tree.Add("bananas");
            tree.Add("xanax");
            tree.Add("xxx");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            File.WriteAllText("Can_find_within_range.log", tree.Visualize(), Encoding.UTF8);

            tree.Serialize(fileName);

            IList <Word> words;

            using (var reader = new MappedTrieReader(fileName))
            {
                words = reader.Range("azz", "xerox").ToList();
            }

            Assert.AreEqual(3, words.Count);
            Assert.AreEqual("banana", words[0].Value);
            Assert.AreEqual("bananas", words[1].Value);
            Assert.AreEqual("xanax", words[2].Value);
        }
Example #8
0
        public void Can_find_greater_than()
        {
            var trie = new LcrsTrie();

            trie.Add("ape");
            trie.Add("app");
            trie.Add("apple");
            trie.Add("banana");
            trie.Add("bananas");

            var words = trie.GreaterThan("ape").ToList();

            Assert.AreEqual(4, words.Count);
            Assert.AreEqual("app", words[0]);
            Assert.AreEqual("apple", words[1]);
            Assert.AreEqual("banana", words[2]);
            Assert.AreEqual("bananas", words[3]);
        }
Example #9
0
        public void Can_append()
        {
            var root = new LcrsTrie('\0', false);

            root.Add("baby");
            root.Add("bad");
            Word word;

            Assert.AreEqual('b', root.LeftChild.Value);
            Assert.AreEqual('a', root.LeftChild.LeftChild.Value);
            Assert.AreEqual('d', root.LeftChild.LeftChild.LeftChild.RightSibling.Value);

            Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value);
            Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value);

            Assert.IsTrue(root.IsWord("baby").Any());
            Assert.IsTrue(root.IsWord("bad").Any());
        }
        public void Commit()
        {
            var deleteSet = new LcrsTrie();

            foreach (var value in _pks)
            {
                var hashString = value.ToHash().ToString(CultureInfo.InvariantCulture);

                deleteSet.Add(hashString);
            }

            foreach (var ix in _ixs)
            {
                var docHashFileName    = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk"));
                var tmpDocHashFileName = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk.tmp"));

                var tmpIxFileName = Path.Combine(_directory, ix.VersionId + ".ix.tmp");
                var ixFileName    = Path.Combine(_directory, ix.VersionId + ".ix");

                var deleted = 0;

                using (var stream = new FileStream(tmpDocHashFileName, FileMode.Create, FileAccess.Write, FileShare.None))
                {
                    foreach (var document in Serializer.DeserializeDocHashes(docHashFileName))
                    {
                        var hash = document.Hash.ToString(CultureInfo.InvariantCulture);

                        IList <Word> found = deleteSet.IsWord(hash).ToList();

                        if (found.Any())
                        {
                            if (!document.IsObsolete)
                            {
                                document.IsObsolete = true;
                                deleted++;
                            }
                        }

                        document.Serialize(stream);
                    }
                }

                if (deleted > 0)
                {
                    ix.DocumentCount -= deleted;
                    ix.Serialize(tmpIxFileName);

                    File.Copy(tmpIxFileName, ixFileName, overwrite: true);
                    File.Copy(tmpDocHashFileName, docHashFileName, overwrite: true);

                    File.Delete(tmpIxFileName);
                    File.Delete(tmpDocHashFileName);
                }
            }
        }
Example #11
0
        public void Can_build_two_legs()
        {
            var root = new LcrsTrie('\0', false);

            root.Add("baby");
            root.Add("dad");
            Word word;

            Assert.That(root.LeftChild.RightSibling.Value, Is.EqualTo('d'));
            Assert.That(root.LeftChild.LeftChild.Value, Is.EqualTo('a'));
            Assert.That(root.LeftChild.RightSibling.LeftChild.LeftChild.Value, Is.EqualTo('d'));

            Assert.That(root.LeftChild.Value, Is.EqualTo('b'));
            Assert.That(root.LeftChild.RightSibling.LeftChild.Value, Is.EqualTo('a'));
            Assert.That(root.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('b'));
            Assert.That(root.LeftChild.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('y'));

            Assert.True(root.HasWord("baby", out word));
            Assert.True(root.HasWord("dad", out word));
        }
Example #12
0
        public void Can_build_two_legs()
        {
            var root = new LcrsTrie('\0', false);

            root.Add("baby");
            root.Add("dad");
            Word word;

            Assert.AreEqual('d', root.LeftChild.RightSibling.Value);
            Assert.AreEqual('a', root.LeftChild.LeftChild.Value);
            Assert.AreEqual('d', root.LeftChild.RightSibling.LeftChild.LeftChild.Value);

            Assert.AreEqual('b', root.LeftChild.Value);
            Assert.AreEqual('a', root.LeftChild.RightSibling.LeftChild.Value);
            Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value);
            Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value);

            Assert.IsTrue(root.IsWord("baby") != null);
            Assert.IsTrue(root.IsWord("dad") != null);
        }
Example #13
0
        public void Can_deserialize_whole_file()
        {
            var fileName = Path.Combine(Dir, "Can_serialize_whole_file.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Add("baby");
            tree.Add("bad");
            tree.Add("bank");
            tree.Add("box");
            tree.Add("dad");
            tree.Add("dance");

            Word found;

            Assert.IsTrue(tree.HasWord("baby", out found));
            Assert.IsTrue(tree.HasWord("bad", out found));
            Assert.IsTrue(tree.HasWord("bank", out found));
            Assert.IsTrue(tree.HasWord("box", out found));
            Assert.IsTrue(tree.HasWord("dad", out found));
            Assert.IsTrue(tree.HasWord("dance", out found));

            tree.Serialize(fileName);

            var recreated = Serializer.DeserializeTrie(Dir, new FileInfo(fileName).Name);

            Assert.IsTrue(recreated.HasWord("baby", out found));
            Assert.IsTrue(recreated.HasWord("bad", out found));
            Assert.IsTrue(recreated.HasWord("bank", out found));
            Assert.IsTrue(recreated.HasWord("box", out found));
            Assert.IsTrue(recreated.HasWord("dad", out found));
            Assert.IsTrue(recreated.HasWord("dance", out found));
        }
Example #14
0
        public void Can_append_tries()
        {
            var one = new LcrsTrie('\0', false);

            one.Add("ape");
            one.Add("app");
            one.Add("banana");

            var two = new LcrsTrie('\0', false);

            two.Add("apple");
            two.Add("banana");

            one.Merge(two);

            Word found;

            Assert.IsTrue(one.HasWord("ape", out found));
            Assert.IsTrue(one.HasWord("app", out found));
            Assert.IsTrue(one.HasWord("apple", out found));
            Assert.IsTrue(one.HasWord("banana", out found));
        }
Example #15
0
        public void Can_build_one_leg()
        {
            var tree = new LcrsTrie('\0', false);

            tree.Add("baby");

            Assert.That(tree.LeftChild.Value, Is.EqualTo('b'));
            Assert.That(tree.LeftChild.LeftChild.Value, Is.EqualTo('a'));
            Assert.That(tree.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('b'));
            Assert.That(tree.LeftChild.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('y'));

            Assert.True(tree.HasWord("baby"));
        }
Example #16
0
        public void Can_build_one_leg()
        {
            var  tree = new LcrsTrie('\0', false);
            Word word;

            tree.Add("baby");

            Assert.AreEqual('b', tree.LeftChild.Value);
            Assert.AreEqual('a', tree.LeftChild.LeftChild.Value);
            Assert.AreEqual('b', tree.LeftChild.LeftChild.LeftChild.Value);
            Assert.AreEqual('y', tree.LeftChild.LeftChild.LeftChild.LeftChild.Value);

            Assert.IsTrue(tree.IsWord("baby").Any());
        }
Example #17
0
        public LcrsTrie ReadWholeFile()
        {
            var words = Words();

            var root = new LcrsTrie();

            // TODO: assemble trie node by node
            foreach (var word in words)
            {
                root.Add(word.Value);
            }

            return(root);
        }
Example #18
0
        public void Can_merge_tries()
        {
            var one = new LcrsTrie('\0', false);

            one.Add("ape");
            one.Add("app");
            one.Add("bananas");

            var two = new LcrsTrie('\0', false);

            two.Add("apple");
            two.Add("banana");
            two.Add("citron");

            one.Merge(two);

            Assert.IsTrue(one.IsWord("ape").Any());
            Assert.IsTrue(one.IsWord("app").Any());
            Assert.IsTrue(one.IsWord("apple").Any());
            Assert.IsTrue(one.IsWord("banana").Any());
            Assert.IsTrue(one.IsWord("bananas").Any());
            Assert.IsTrue(one.IsWord("citron").Any());
        }
Example #19
0
        public LcrsTrie ReadWholeFile()
        {
            var words = new List <Word>();

            DepthFirst(string.Empty, new List <char>(), words, -1);

            var root = new LcrsTrie();

            foreach (var word in words)
            {
                root.Add(word.Value);
            }

            return(root.LeftChild);
        }
Example #20
0
        public LcrsTrie ReadWholeFile()
        {
            var words = new List <Word>();

            DepthFirst(string.Empty, new List <char>(), words, -1);

            var root = new LcrsTrie();

            // TODO: assemble trie node by node
            foreach (var word in words)
            {
                root.Add(word.Value);
            }

            return(root);
        }
Example #21
0
        public void Can_find_exact()
        {
            var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_exact.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Add("xor");
            tree.Add("xxx");
            tree.Add("donkey");
            tree.Add("xavier");
            tree.Add("baby");
            tree.Add("dad");
            tree.Add("daddy");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.IsWord("xxx") != null);
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.IsWord("xxx") != null);
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.IsWord("baby") != null);
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.IsWord("xxx") != null);
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.IsWord("baby") != null);
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.IsWord("dad") != null);
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.IsWord("daddy") != null);
            }
        }
Example #22
0
        public void Can_find_within_numeric_range()
        {
            var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_within_numeric_range.tri");

            var tree = new LcrsTrie();

            tree.Add("0000123");
            tree.Add("0000333");
            tree.Add("0012345");
            tree.Add("0100006");
            tree.Add("1000989");
            tree.Add("0077777");
            tree.Add("0000666");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            File.WriteAllText("Can_find_within_numeric_range.log", tree.Visualize(), Encoding.UTF8);

            tree.Serialize(fileName);

            IList <Word> words;

            using (var reader = new MappedTrieReader(fileName))
            {
                words = reader.Range("0000333", "0100006").ToList();
            }

            Assert.AreEqual(5, words.Count);
            Assert.AreEqual("0000333", words[0].Value);
            Assert.AreEqual("0000666", words[1].Value);
            Assert.AreEqual("0012345", words[2].Value);
            Assert.AreEqual("0077777", words[3].Value);
            Assert.AreEqual("0100006", words[4].Value);
        }
Example #23
0
        public void Can_deserialize_whole_file()
        {
            var dir = CreateDir();

            var fileName = Path.Combine(dir, "MappedTrieReaderTests.Can_deserialize_whole_file.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Add("baby");
            tree.Add("bad");
            tree.Add("badness");
            tree.Add("bank");
            tree.Add("box");
            tree.Add("dad");
            tree.Add("dance");
            tree.Add("flower");
            tree.Add("flowers");
            tree.Add("globe");
            tree.Add("global");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            Assert.IsTrue(tree.IsWord("baby").Any());
            Assert.IsTrue(tree.IsWord("bad").Any());
            Assert.IsTrue(tree.IsWord("badness").Any());
            Assert.IsTrue(tree.IsWord("bank").Any());
            Assert.IsTrue(tree.IsWord("box").Any());
            Assert.IsTrue(tree.IsWord("dad").Any());
            Assert.IsTrue(tree.IsWord("dance").Any());
            Assert.IsTrue(tree.IsWord("flower").Any());
            Assert.IsTrue(tree.IsWord("flowers").Any());
            Assert.IsTrue(tree.IsWord("globe").Any());
            Assert.IsTrue(tree.IsWord("global").Any());

            tree.Serialize(fileName);
            File.WriteAllText("Can_deserialize_whole_file.log", tree.Visualize(), System.Text.Encoding.UTF8);

            var recreated = Serializer.DeserializeTrie(dir, new FileInfo(fileName).Name);

            Assert.IsTrue(recreated.IsWord("baby").Any());
            Assert.IsTrue(recreated.IsWord("bad").Any());
            Assert.IsTrue(recreated.IsWord("badness").Any());
            Assert.IsTrue(recreated.IsWord("bank").Any());
            Assert.IsTrue(recreated.IsWord("box").Any());
            Assert.IsTrue(recreated.IsWord("dad").Any());
            Assert.IsTrue(recreated.IsWord("dance").Any());
            Assert.IsTrue(recreated.IsWord("flower").Any());
            Assert.IsTrue(recreated.IsWord("flowers").Any());
            Assert.IsTrue(recreated.IsWord("globe").Any());
            Assert.IsTrue(recreated.IsWord("global").Any());
        }
Example #24
0
        public void Can_find_exact()
        {
            var fileName = Path.Combine(Dir, "Can_find_exact_mm.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Add("xor");
            tree.Add("xxx");
            tree.Add("donkey");
            tree.Add("xavier");
            tree.Serialize(fileName);

            Word word;

            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.HasWord("xxx", out word));
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsFalse(reader.HasWord("baby", out word));
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsFalse(reader.HasWord("dad", out word));
            }

            tree.Add("baby");
            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.HasWord("xxx", out word));
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.HasWord("baby", out word));
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsFalse(reader.HasWord("dad", out word));
            }

            tree.Add("dad");
            tree.Add("daddy");
            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.HasWord("xxx", out word));
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.HasWord("baby", out word));
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.HasWord("dad", out word));
            }
            using (var reader = new MappedTrieReader(fileName))
            {
                Assert.IsTrue(reader.HasWord("daddy", out word));
            }
        }
Example #25
0
        public void Can_find_near()
        {
            var fileName = Path.Combine(Dir, "Can_find_near.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("ba", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(0, near.Count);
            }

            tree.Add("bad");
            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("ba", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(1, near.Count);
                Assert.IsTrue(near.Contains("bad"));
            }

            tree.Add("baby");
            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("ba", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(1, near.Count);
                Assert.IsTrue(near.Contains("bad"));
            }

            tree.Add("b");
            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("ba", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(2, near.Count);
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("b"));
            }

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("ba", 2).Select(w => w.Value).ToList();

                Assert.AreEqual(3, near.Count);
                Assert.IsTrue(near.Contains("b"));
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("baby"));
            }

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("ba", 0).Select(w => w.Value).ToList();

                Assert.AreEqual(0, near.Count);
            }

            tree.Add("bananas");
            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("ba", 6).Select(w => w.Value).ToList();

                Assert.AreEqual(4, near.Count);
                Assert.IsTrue(near.Contains("b"));
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("baby"));
                Assert.IsTrue(near.Contains("bananas"));
            }

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("bazy", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(1, near.Count);
                Assert.IsTrue(near.Contains("baby"));
            }

            tree.Add("bank");
            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.Near("bazy", 3).Select(w => w.Value).ToList();

                Assert.AreEqual(4, near.Count);
                Assert.IsTrue(near.Contains("baby"));
                Assert.IsTrue(near.Contains("bank"));
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("b"));
            }
        }
Example #26
0
        public void Can_find_prefixed()
        {
            var fileName = Path.Combine(Dir, "Can_find_prefixed.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Add("rambo");
            tree.Add("rambo");

            tree.Add("2");

            tree.Add("rocky");

            tree.Add("2");

            tree.Add("raiders");

            tree.Add("of");
            tree.Add("the");
            tree.Add("lost");
            tree.Add("ark");

            tree.Add("rain");

            tree.Add("man");

            tree.Serialize(fileName);

            var prefixed = new MappedTrieReader(fileName).StartsWith("ra").Select(w => w.Value).ToList();

            Assert.AreEqual(3, prefixed.Count);
            Assert.IsTrue(prefixed.Contains("rambo"));
            Assert.IsTrue(prefixed.Contains("raiders"));
            Assert.IsTrue(prefixed.Contains("rain"));
        }
Example #27
0
        public void Can_find_prefixed()
        {
            var tree = new LcrsTrie('\0', false);

            tree.Add("rambo");
            tree.Add("rambo");

            tree.Add("2");

            tree.Add("rocky");

            tree.Add("2");

            tree.Add("raiders");

            tree.Add("of");
            tree.Add("the");
            tree.Add("lost");
            tree.Add("ark");

            tree.Add("rain");

            tree.Add("man");

            var prefixed = tree.StartsWith("ra").Select(w => w.Value).ToList();

            Assert.That(prefixed.Count, Is.EqualTo(3));
            Assert.IsTrue(prefixed.Contains("rambo"));
            Assert.IsTrue(prefixed.Contains("raiders"));
            Assert.IsTrue(prefixed.Contains("rain"));
        }
Example #28
0
        public void Can_find_prefixed()
        {
            var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_prefixed.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Add("rambo");
            tree.Add("rambo");

            tree.Add("2");

            tree.Add("rocky");

            tree.Add("2");

            tree.Add("raiders");

            tree.Add("of");
            tree.Add("the");
            tree.Add("lost");
            tree.Add("ark");

            tree.Add("rain");

            tree.Add("man");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            tree.Serialize(fileName);

            var prefixed = new MappedTrieReader(fileName).StartsWith("ra").Select(w => w.Value).ToList();

            Assert.AreEqual(3, prefixed.Count);
            Assert.IsTrue(prefixed.Contains("rambo"));
            Assert.IsTrue(prefixed.Contains("raiders"));
            Assert.IsTrue(prefixed.Contains("rain"));
        }
Example #29
0
        public void Can_find_near()
        {
            var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_near.tri");

            var tree = new LcrsTrie();

            tree.Add("bad");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("ba", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(1, near.Count);
                Assert.IsTrue(near.Contains("bad"));
            }

            tree = new LcrsTrie();
            tree.Add("baby");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            tree.Serialize(fileName);

            File.WriteAllText("Can_find_near.log", tree.Visualize(), System.Text.Encoding.UTF8);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("ba", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(1, near.Count);
                Assert.IsTrue(near.Contains("bad"));
            }

            tree = new LcrsTrie();
            tree.Add("b");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            tree.Serialize(fileName);
            File.WriteAllText("Can_find_near.log", tree.Visualize(), System.Text.Encoding.UTF8);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("ba", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(2, near.Count);
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("b"));
            }

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("ba", 2).Select(w => w.Value).ToList();

                Assert.AreEqual(3, near.Count);
                Assert.IsTrue(near.Contains("b"));
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("baby"));
            }

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("ba", 0).Select(w => w.Value).ToList();

                Assert.AreEqual(0, near.Count);
            }

            tree = new LcrsTrie();
            tree.Add("bananas");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("ba", 6).Select(w => w.Value).ToList();

                Assert.AreEqual(4, near.Count);
                Assert.IsTrue(near.Contains("b"));
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("baby"));
                Assert.IsTrue(near.Contains("bananas"));
            }

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("bazy", 1).Select(w => w.Value).ToList();

                Assert.AreEqual(1, near.Count);
                Assert.IsTrue(near.Contains("baby"));
            }

            tree = new LcrsTrie();
            tree.Add("bank");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            tree.Serialize(fileName);

            using (var reader = new MappedTrieReader(fileName))
            {
                var near = reader.SemanticallyNear("bazy", 3).Select(w => w.Value).ToList();

                Assert.AreEqual(4, near.Count);
                Assert.IsTrue(near.Contains("baby"));
                Assert.IsTrue(near.Contains("bank"));
                Assert.IsTrue(near.Contains("bad"));
                Assert.IsTrue(near.Contains("b"));
            }
        }