public void Can_find_exact() { var tree = new LcrsTrie('\0', false); Word word; Assert.IsFalse(tree.IsWord("xxx").Any()); tree.Add("xxx"); Assert.IsTrue(tree.IsWord("xxx").Any()); Assert.IsFalse(tree.IsWord("baby").Any()); Assert.IsFalse(tree.IsWord("dad").Any()); tree.Add("baby"); Assert.IsTrue(tree.IsWord("xxx").Any()); Assert.IsTrue(tree.IsWord("baby").Any()); Assert.IsFalse(tree.IsWord("dad").Any()); tree.Add("dad"); Assert.IsTrue(tree.IsWord("xxx").Any()); Assert.IsTrue(tree.IsWord("baby").Any()); Assert.IsTrue(tree.IsWord("dad").Any()); }
public void Can_append() { var root = new LcrsTrie('\0', false); root.Add("baby"); root.Add("bad"); Word word; Assert.AreEqual('b', root.LeftChild.Value); Assert.AreEqual('a', root.LeftChild.LeftChild.Value); Assert.AreEqual('d', root.LeftChild.LeftChild.LeftChild.RightSibling.Value); Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value); Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value); Assert.IsTrue(root.IsWord("baby").Any()); Assert.IsTrue(root.IsWord("bad").Any()); }
public void Commit() { var deleteSet = new LcrsTrie(); foreach (var value in _pks) { var hashString = value.ToHash().ToString(CultureInfo.InvariantCulture); deleteSet.Add(hashString); } foreach (var ix in _ixs) { var docHashFileName = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk")); var tmpDocHashFileName = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk.tmp")); var tmpIxFileName = Path.Combine(_directory, ix.VersionId + ".ix.tmp"); var ixFileName = Path.Combine(_directory, ix.VersionId + ".ix"); var deleted = 0; using (var stream = new FileStream(tmpDocHashFileName, FileMode.Create, FileAccess.Write, FileShare.None)) { foreach (var document in Serializer.DeserializeDocHashes(docHashFileName)) { var hash = document.Hash.ToString(CultureInfo.InvariantCulture); IList <Word> found = deleteSet.IsWord(hash).ToList(); if (found.Any()) { if (!document.IsObsolete) { document.IsObsolete = true; deleted++; } } document.Serialize(stream); } } if (deleted > 0) { ix.DocumentCount -= deleted; ix.Serialize(tmpIxFileName); File.Copy(tmpIxFileName, ixFileName, overwrite: true); File.Copy(tmpDocHashFileName, docHashFileName, overwrite: true); File.Delete(tmpIxFileName); File.Delete(tmpDocHashFileName); } } }
public void Can_build_two_legs() { var root = new LcrsTrie('\0', false); root.Add("baby"); root.Add("dad"); Word word; Assert.AreEqual('d', root.LeftChild.RightSibling.Value); Assert.AreEqual('a', root.LeftChild.LeftChild.Value); Assert.AreEqual('d', root.LeftChild.RightSibling.LeftChild.LeftChild.Value); Assert.AreEqual('b', root.LeftChild.Value); Assert.AreEqual('a', root.LeftChild.RightSibling.LeftChild.Value); Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value); Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value); Assert.IsTrue(root.IsWord("baby") != null); Assert.IsTrue(root.IsWord("dad") != null); }
public void Can_build_one_leg() { var tree = new LcrsTrie('\0', false); Word word; tree.Add("baby"); Assert.AreEqual('b', tree.LeftChild.Value); Assert.AreEqual('a', tree.LeftChild.LeftChild.Value); Assert.AreEqual('b', tree.LeftChild.LeftChild.LeftChild.Value); Assert.AreEqual('y', tree.LeftChild.LeftChild.LeftChild.LeftChild.Value); Assert.IsTrue(tree.IsWord("baby").Any()); }
public void Can_merge_tries() { var one = new LcrsTrie('\0', false); one.Add("ape"); one.Add("app"); one.Add("bananas"); var two = new LcrsTrie('\0', false); two.Add("apple"); two.Add("banana"); two.Add("citron"); one.Merge(two); Assert.IsTrue(one.IsWord("ape").Any()); Assert.IsTrue(one.IsWord("app").Any()); Assert.IsTrue(one.IsWord("apple").Any()); Assert.IsTrue(one.IsWord("banana").Any()); Assert.IsTrue(one.IsWord("bananas").Any()); Assert.IsTrue(one.IsWord("citron").Any()); }
public void Can_deserialize_whole_file() { var dir = CreateDir(); var fileName = Path.Combine(dir, "MappedTrieReaderTests.Can_deserialize_whole_file.tri"); var tree = new LcrsTrie('\0', false); tree.Add("baby"); tree.Add("bad"); tree.Add("badness"); tree.Add("bank"); tree.Add("box"); tree.Add("dad"); tree.Add("dance"); tree.Add("flower"); tree.Add("flowers"); tree.Add("globe"); tree.Add("global"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } Assert.IsTrue(tree.IsWord("baby").Any()); Assert.IsTrue(tree.IsWord("bad").Any()); Assert.IsTrue(tree.IsWord("badness").Any()); Assert.IsTrue(tree.IsWord("bank").Any()); Assert.IsTrue(tree.IsWord("box").Any()); Assert.IsTrue(tree.IsWord("dad").Any()); Assert.IsTrue(tree.IsWord("dance").Any()); Assert.IsTrue(tree.IsWord("flower").Any()); Assert.IsTrue(tree.IsWord("flowers").Any()); Assert.IsTrue(tree.IsWord("globe").Any()); Assert.IsTrue(tree.IsWord("global").Any()); tree.Serialize(fileName); File.WriteAllText("Can_deserialize_whole_file.log", tree.Visualize(), System.Text.Encoding.UTF8); var recreated = Serializer.DeserializeTrie(dir, new FileInfo(fileName).Name); Assert.IsTrue(recreated.IsWord("baby").Any()); Assert.IsTrue(recreated.IsWord("bad").Any()); Assert.IsTrue(recreated.IsWord("badness").Any()); Assert.IsTrue(recreated.IsWord("bank").Any()); Assert.IsTrue(recreated.IsWord("box").Any()); Assert.IsTrue(recreated.IsWord("dad").Any()); Assert.IsTrue(recreated.IsWord("dance").Any()); Assert.IsTrue(recreated.IsWord("flower").Any()); Assert.IsTrue(recreated.IsWord("flowers").Any()); Assert.IsTrue(recreated.IsWord("globe").Any()); Assert.IsTrue(recreated.IsWord("global").Any()); }