Beispiel #1
0
        public void Can_find_exact()
        {
            var  tree = new LcrsTrie('\0', false);
            Word word;

            Assert.IsFalse(tree.IsWord("xxx").Any());

            tree.Add("xxx");

            Assert.IsTrue(tree.IsWord("xxx").Any());
            Assert.IsFalse(tree.IsWord("baby").Any());
            Assert.IsFalse(tree.IsWord("dad").Any());

            tree.Add("baby");

            Assert.IsTrue(tree.IsWord("xxx").Any());
            Assert.IsTrue(tree.IsWord("baby").Any());
            Assert.IsFalse(tree.IsWord("dad").Any());

            tree.Add("dad");

            Assert.IsTrue(tree.IsWord("xxx").Any());
            Assert.IsTrue(tree.IsWord("baby").Any());
            Assert.IsTrue(tree.IsWord("dad").Any());
        }
Beispiel #2
0
        public void Can_append()
        {
            var root = new LcrsTrie('\0', false);

            root.Add("baby");
            root.Add("bad");
            Word word;

            Assert.AreEqual('b', root.LeftChild.Value);
            Assert.AreEqual('a', root.LeftChild.LeftChild.Value);
            Assert.AreEqual('d', root.LeftChild.LeftChild.LeftChild.RightSibling.Value);

            Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value);
            Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value);

            Assert.IsTrue(root.IsWord("baby").Any());
            Assert.IsTrue(root.IsWord("bad").Any());
        }
        public void Commit()
        {
            var deleteSet = new LcrsTrie();

            foreach (var value in _pks)
            {
                var hashString = value.ToHash().ToString(CultureInfo.InvariantCulture);

                deleteSet.Add(hashString);
            }

            foreach (var ix in _ixs)
            {
                var docHashFileName    = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk"));
                var tmpDocHashFileName = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk.tmp"));

                var tmpIxFileName = Path.Combine(_directory, ix.VersionId + ".ix.tmp");
                var ixFileName    = Path.Combine(_directory, ix.VersionId + ".ix");

                var deleted = 0;

                using (var stream = new FileStream(tmpDocHashFileName, FileMode.Create, FileAccess.Write, FileShare.None))
                {
                    foreach (var document in Serializer.DeserializeDocHashes(docHashFileName))
                    {
                        var hash = document.Hash.ToString(CultureInfo.InvariantCulture);

                        IList <Word> found = deleteSet.IsWord(hash).ToList();

                        if (found.Any())
                        {
                            if (!document.IsObsolete)
                            {
                                document.IsObsolete = true;
                                deleted++;
                            }
                        }

                        document.Serialize(stream);
                    }
                }

                if (deleted > 0)
                {
                    ix.DocumentCount -= deleted;
                    ix.Serialize(tmpIxFileName);

                    File.Copy(tmpIxFileName, ixFileName, overwrite: true);
                    File.Copy(tmpDocHashFileName, docHashFileName, overwrite: true);

                    File.Delete(tmpIxFileName);
                    File.Delete(tmpDocHashFileName);
                }
            }
        }
Beispiel #4
0
        public void Can_build_two_legs()
        {
            var root = new LcrsTrie('\0', false);

            root.Add("baby");
            root.Add("dad");
            Word word;

            Assert.AreEqual('d', root.LeftChild.RightSibling.Value);
            Assert.AreEqual('a', root.LeftChild.LeftChild.Value);
            Assert.AreEqual('d', root.LeftChild.RightSibling.LeftChild.LeftChild.Value);

            Assert.AreEqual('b', root.LeftChild.Value);
            Assert.AreEqual('a', root.LeftChild.RightSibling.LeftChild.Value);
            Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value);
            Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value);

            Assert.IsTrue(root.IsWord("baby") != null);
            Assert.IsTrue(root.IsWord("dad") != null);
        }
Beispiel #5
0
        public void Can_build_one_leg()
        {
            var  tree = new LcrsTrie('\0', false);
            Word word;

            tree.Add("baby");

            Assert.AreEqual('b', tree.LeftChild.Value);
            Assert.AreEqual('a', tree.LeftChild.LeftChild.Value);
            Assert.AreEqual('b', tree.LeftChild.LeftChild.LeftChild.Value);
            Assert.AreEqual('y', tree.LeftChild.LeftChild.LeftChild.LeftChild.Value);

            Assert.IsTrue(tree.IsWord("baby").Any());
        }
Beispiel #6
0
        public void Can_merge_tries()
        {
            var one = new LcrsTrie('\0', false);

            one.Add("ape");
            one.Add("app");
            one.Add("bananas");

            var two = new LcrsTrie('\0', false);

            two.Add("apple");
            two.Add("banana");
            two.Add("citron");

            one.Merge(two);

            Assert.IsTrue(one.IsWord("ape").Any());
            Assert.IsTrue(one.IsWord("app").Any());
            Assert.IsTrue(one.IsWord("apple").Any());
            Assert.IsTrue(one.IsWord("banana").Any());
            Assert.IsTrue(one.IsWord("bananas").Any());
            Assert.IsTrue(one.IsWord("citron").Any());
        }
Beispiel #7
0
        public void Can_deserialize_whole_file()
        {
            var dir = CreateDir();

            var fileName = Path.Combine(dir, "MappedTrieReaderTests.Can_deserialize_whole_file.tri");

            var tree = new LcrsTrie('\0', false);

            tree.Add("baby");
            tree.Add("bad");
            tree.Add("badness");
            tree.Add("bank");
            tree.Add("box");
            tree.Add("dad");
            tree.Add("dance");
            tree.Add("flower");
            tree.Add("flowers");
            tree.Add("globe");
            tree.Add("global");

            foreach (var node in tree.EndOfWordNodes())
            {
                node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue);
            }

            Assert.IsTrue(tree.IsWord("baby").Any());
            Assert.IsTrue(tree.IsWord("bad").Any());
            Assert.IsTrue(tree.IsWord("badness").Any());
            Assert.IsTrue(tree.IsWord("bank").Any());
            Assert.IsTrue(tree.IsWord("box").Any());
            Assert.IsTrue(tree.IsWord("dad").Any());
            Assert.IsTrue(tree.IsWord("dance").Any());
            Assert.IsTrue(tree.IsWord("flower").Any());
            Assert.IsTrue(tree.IsWord("flowers").Any());
            Assert.IsTrue(tree.IsWord("globe").Any());
            Assert.IsTrue(tree.IsWord("global").Any());

            tree.Serialize(fileName);
            File.WriteAllText("Can_deserialize_whole_file.log", tree.Visualize(), System.Text.Encoding.UTF8);

            var recreated = Serializer.DeserializeTrie(dir, new FileInfo(fileName).Name);

            Assert.IsTrue(recreated.IsWord("baby").Any());
            Assert.IsTrue(recreated.IsWord("bad").Any());
            Assert.IsTrue(recreated.IsWord("badness").Any());
            Assert.IsTrue(recreated.IsWord("bank").Any());
            Assert.IsTrue(recreated.IsWord("box").Any());
            Assert.IsTrue(recreated.IsWord("dad").Any());
            Assert.IsTrue(recreated.IsWord("dance").Any());
            Assert.IsTrue(recreated.IsWord("flower").Any());
            Assert.IsTrue(recreated.IsWord("flowers").Any());
            Assert.IsTrue(recreated.IsWord("globe").Any());
            Assert.IsTrue(recreated.IsWord("global").Any());
        }