public void Can_find_exact() { var tree = new LcrsTrie('\0', false); Word word; Assert.False(tree.HasWord("xxx", out word)); tree.Add("xxx"); Assert.True(tree.HasWord("xxx", out word)); Assert.False(tree.HasWord("baby", out word)); Assert.False(tree.HasWord("dad", out word)); tree.Add("baby"); Assert.True(tree.HasWord("xxx", out word)); Assert.True(tree.HasWord("baby", out word)); Assert.False(tree.HasWord("dad", out word)); tree.Add("dad"); Assert.True(tree.HasWord("xxx", out word)); Assert.True(tree.HasWord("baby", out word)); Assert.True(tree.HasWord("dad", out word)); }
public void Can_find_near() { var tree = new LcrsTrie('\0', false); var near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList(); Assert.IsFalse(near.Any()); tree.Add("bad"); near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("bad")); tree.Add("baby"); near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("bad")); tree.Add("b"); near = tree.SemanticallyNear("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(2, near.Count); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); near = tree.SemanticallyNear("ba", 2).Select(w => w.Value).ToList(); Assert.AreEqual(3, near.Count); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); near = tree.SemanticallyNear("ba", 0).Select(w => w.Value).ToList(); Assert.AreEqual(0, near.Count); tree.Add("bananas"); near = tree.SemanticallyNear("ba", 6).Select(w => w.Value).ToList(); Assert.AreEqual(4, near.Count); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bananas")); near = tree.SemanticallyNear("bazy", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("baby")); tree.Add("bank"); near = tree.SemanticallyNear("bazy", 3).Select(w => w.Value).ToList(); Assert.AreEqual(4, near.Count); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bank")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); }
public void Can_find_near() { var tree = new LcrsTrie('\0', false); var near = tree.Near("ba", 1).Select(w => w.Value).ToList(); Assert.That(near, Is.Empty); tree.Add("bad"); near = tree.Near("ba", 1).Select(w => w.Value).ToList(); Assert.That(near.Count, Is.EqualTo(1)); Assert.IsTrue(near.Contains("bad")); tree.Add("baby"); near = tree.Near("ba", 1).Select(w => w.Value).ToList(); Assert.That(near.Count, Is.EqualTo(1)); Assert.IsTrue(near.Contains("bad")); tree.Add("b"); near = tree.Near("ba", 1).Select(w => w.Value).ToList(); Assert.That(near.Count, Is.EqualTo(2)); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); near = tree.Near("ba", 2).Select(w => w.Value).ToList(); Assert.That(near.Count, Is.EqualTo(3)); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); near = tree.Near("ba", 0).Select(w => w.Value).ToList(); Assert.That(near.Count, Is.EqualTo(0)); tree.Add("bananas"); near = tree.Near("ba", 6).Select(w => w.Value).ToList(); Assert.That(near.Count, Is.EqualTo(4)); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bananas")); near = tree.Near("bazy", 1).Select(w => w.Value).ToList(); Assert.That(near.Count, Is.EqualTo(1)); Assert.IsTrue(near.Contains("baby")); tree.Add("bank"); near = tree.Near("bazy", 3).Select(w => w.Value).ToList(); Assert.AreEqual(4, near.Count); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bank")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); }
public void Can_find_exact() { var tree = new LcrsTrie('\0', false); Word word; Assert.IsFalse(tree.IsWord("xxx").Any()); tree.Add("xxx"); Assert.IsTrue(tree.IsWord("xxx").Any()); Assert.IsFalse(tree.IsWord("baby").Any()); Assert.IsFalse(tree.IsWord("dad").Any()); tree.Add("baby"); Assert.IsTrue(tree.IsWord("xxx").Any()); Assert.IsTrue(tree.IsWord("baby").Any()); Assert.IsFalse(tree.IsWord("dad").Any()); tree.Add("dad"); Assert.IsTrue(tree.IsWord("xxx").Any()); Assert.IsTrue(tree.IsWord("baby").Any()); Assert.IsTrue(tree.IsWord("dad").Any()); }
public void Can_get_weight() { var tree = new LcrsTrie('\0', false); tree.Add("pap"); tree.Add("papp"); tree.Add("papaya"); Assert.AreEqual(8, tree.Weight); tree.Add("ape"); tree.Add("apelsin"); Assert.AreEqual(15, tree.Weight); }
public void Can_append() { var root = new LcrsTrie('\0', false); root.Add("baby"); root.Add("bad"); Assert.That(root.LeftChild.Value, Is.EqualTo('b')); Assert.That(root.LeftChild.LeftChild.Value, Is.EqualTo('a')); Assert.That(root.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('d')); Assert.That(root.LeftChild.LeftChild.LeftChild.RightSibling.Value, Is.EqualTo('b')); Assert.That(root.LeftChild.LeftChild.LeftChild.RightSibling.LeftChild.Value, Is.EqualTo('y')); Assert.True(root.HasWord("baby")); Assert.True(root.HasWord("bad")); }
public void Can_find_within_range() { var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_within_range.tri"); var tree = new LcrsTrie(); tree.Add("ape"); tree.Add("app"); tree.Add("apple"); tree.Add("banana"); tree.Add("bananas"); tree.Add("xanax"); tree.Add("xxx"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } File.WriteAllText("Can_find_within_range.log", tree.Visualize(), Encoding.UTF8); tree.Serialize(fileName); IList <Word> words; using (var reader = new MappedTrieReader(fileName)) { words = reader.Range("azz", "xerox").ToList(); } Assert.AreEqual(3, words.Count); Assert.AreEqual("banana", words[0].Value); Assert.AreEqual("bananas", words[1].Value); Assert.AreEqual("xanax", words[2].Value); }
public void Can_find_greater_than() { var trie = new LcrsTrie(); trie.Add("ape"); trie.Add("app"); trie.Add("apple"); trie.Add("banana"); trie.Add("bananas"); var words = trie.GreaterThan("ape").ToList(); Assert.AreEqual(4, words.Count); Assert.AreEqual("app", words[0]); Assert.AreEqual("apple", words[1]); Assert.AreEqual("banana", words[2]); Assert.AreEqual("bananas", words[3]); }
public void Can_append() { var root = new LcrsTrie('\0', false); root.Add("baby"); root.Add("bad"); Word word; Assert.AreEqual('b', root.LeftChild.Value); Assert.AreEqual('a', root.LeftChild.LeftChild.Value); Assert.AreEqual('d', root.LeftChild.LeftChild.LeftChild.RightSibling.Value); Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value); Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value); Assert.IsTrue(root.IsWord("baby").Any()); Assert.IsTrue(root.IsWord("bad").Any()); }
public void Commit() { var deleteSet = new LcrsTrie(); foreach (var value in _pks) { var hashString = value.ToHash().ToString(CultureInfo.InvariantCulture); deleteSet.Add(hashString); } foreach (var ix in _ixs) { var docHashFileName = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk")); var tmpDocHashFileName = Path.Combine(_directory, string.Format("{0}.{1}", ix.VersionId, "pk.tmp")); var tmpIxFileName = Path.Combine(_directory, ix.VersionId + ".ix.tmp"); var ixFileName = Path.Combine(_directory, ix.VersionId + ".ix"); var deleted = 0; using (var stream = new FileStream(tmpDocHashFileName, FileMode.Create, FileAccess.Write, FileShare.None)) { foreach (var document in Serializer.DeserializeDocHashes(docHashFileName)) { var hash = document.Hash.ToString(CultureInfo.InvariantCulture); IList <Word> found = deleteSet.IsWord(hash).ToList(); if (found.Any()) { if (!document.IsObsolete) { document.IsObsolete = true; deleted++; } } document.Serialize(stream); } } if (deleted > 0) { ix.DocumentCount -= deleted; ix.Serialize(tmpIxFileName); File.Copy(tmpIxFileName, ixFileName, overwrite: true); File.Copy(tmpDocHashFileName, docHashFileName, overwrite: true); File.Delete(tmpIxFileName); File.Delete(tmpDocHashFileName); } } }
public void Can_build_two_legs() { var root = new LcrsTrie('\0', false); root.Add("baby"); root.Add("dad"); Word word; Assert.That(root.LeftChild.RightSibling.Value, Is.EqualTo('d')); Assert.That(root.LeftChild.LeftChild.Value, Is.EqualTo('a')); Assert.That(root.LeftChild.RightSibling.LeftChild.LeftChild.Value, Is.EqualTo('d')); Assert.That(root.LeftChild.Value, Is.EqualTo('b')); Assert.That(root.LeftChild.RightSibling.LeftChild.Value, Is.EqualTo('a')); Assert.That(root.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('b')); Assert.That(root.LeftChild.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('y')); Assert.True(root.HasWord("baby", out word)); Assert.True(root.HasWord("dad", out word)); }
public void Can_build_two_legs() { var root = new LcrsTrie('\0', false); root.Add("baby"); root.Add("dad"); Word word; Assert.AreEqual('d', root.LeftChild.RightSibling.Value); Assert.AreEqual('a', root.LeftChild.LeftChild.Value); Assert.AreEqual('d', root.LeftChild.RightSibling.LeftChild.LeftChild.Value); Assert.AreEqual('b', root.LeftChild.Value); Assert.AreEqual('a', root.LeftChild.RightSibling.LeftChild.Value); Assert.AreEqual('b', root.LeftChild.LeftChild.LeftChild.Value); Assert.AreEqual('y', root.LeftChild.LeftChild.LeftChild.LeftChild.Value); Assert.IsTrue(root.IsWord("baby") != null); Assert.IsTrue(root.IsWord("dad") != null); }
public void Can_deserialize_whole_file() { var fileName = Path.Combine(Dir, "Can_serialize_whole_file.tri"); var tree = new LcrsTrie('\0', false); tree.Add("baby"); tree.Add("bad"); tree.Add("bank"); tree.Add("box"); tree.Add("dad"); tree.Add("dance"); Word found; Assert.IsTrue(tree.HasWord("baby", out found)); Assert.IsTrue(tree.HasWord("bad", out found)); Assert.IsTrue(tree.HasWord("bank", out found)); Assert.IsTrue(tree.HasWord("box", out found)); Assert.IsTrue(tree.HasWord("dad", out found)); Assert.IsTrue(tree.HasWord("dance", out found)); tree.Serialize(fileName); var recreated = Serializer.DeserializeTrie(Dir, new FileInfo(fileName).Name); Assert.IsTrue(recreated.HasWord("baby", out found)); Assert.IsTrue(recreated.HasWord("bad", out found)); Assert.IsTrue(recreated.HasWord("bank", out found)); Assert.IsTrue(recreated.HasWord("box", out found)); Assert.IsTrue(recreated.HasWord("dad", out found)); Assert.IsTrue(recreated.HasWord("dance", out found)); }
public void Can_append_tries() { var one = new LcrsTrie('\0', false); one.Add("ape"); one.Add("app"); one.Add("banana"); var two = new LcrsTrie('\0', false); two.Add("apple"); two.Add("banana"); one.Merge(two); Word found; Assert.IsTrue(one.HasWord("ape", out found)); Assert.IsTrue(one.HasWord("app", out found)); Assert.IsTrue(one.HasWord("apple", out found)); Assert.IsTrue(one.HasWord("banana", out found)); }
public void Can_build_one_leg() { var tree = new LcrsTrie('\0', false); tree.Add("baby"); Assert.That(tree.LeftChild.Value, Is.EqualTo('b')); Assert.That(tree.LeftChild.LeftChild.Value, Is.EqualTo('a')); Assert.That(tree.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('b')); Assert.That(tree.LeftChild.LeftChild.LeftChild.LeftChild.Value, Is.EqualTo('y')); Assert.True(tree.HasWord("baby")); }
public void Can_build_one_leg() { var tree = new LcrsTrie('\0', false); Word word; tree.Add("baby"); Assert.AreEqual('b', tree.LeftChild.Value); Assert.AreEqual('a', tree.LeftChild.LeftChild.Value); Assert.AreEqual('b', tree.LeftChild.LeftChild.LeftChild.Value); Assert.AreEqual('y', tree.LeftChild.LeftChild.LeftChild.LeftChild.Value); Assert.IsTrue(tree.IsWord("baby").Any()); }
public LcrsTrie ReadWholeFile() { var words = Words(); var root = new LcrsTrie(); // TODO: assemble trie node by node foreach (var word in words) { root.Add(word.Value); } return(root); }
public void Can_merge_tries() { var one = new LcrsTrie('\0', false); one.Add("ape"); one.Add("app"); one.Add("bananas"); var two = new LcrsTrie('\0', false); two.Add("apple"); two.Add("banana"); two.Add("citron"); one.Merge(two); Assert.IsTrue(one.IsWord("ape").Any()); Assert.IsTrue(one.IsWord("app").Any()); Assert.IsTrue(one.IsWord("apple").Any()); Assert.IsTrue(one.IsWord("banana").Any()); Assert.IsTrue(one.IsWord("bananas").Any()); Assert.IsTrue(one.IsWord("citron").Any()); }
public LcrsTrie ReadWholeFile() { var words = new List <Word>(); DepthFirst(string.Empty, new List <char>(), words, -1); var root = new LcrsTrie(); foreach (var word in words) { root.Add(word.Value); } return(root.LeftChild); }
public LcrsTrie ReadWholeFile() { var words = new List <Word>(); DepthFirst(string.Empty, new List <char>(), words, -1); var root = new LcrsTrie(); // TODO: assemble trie node by node foreach (var word in words) { root.Add(word.Value); } return(root); }
public void Can_find_exact() { var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_exact.tri"); var tree = new LcrsTrie('\0', false); tree.Add("xor"); tree.Add("xxx"); tree.Add("donkey"); tree.Add("xavier"); tree.Add("baby"); tree.Add("dad"); tree.Add("daddy"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.IsWord("xxx") != null); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.IsWord("xxx") != null); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.IsWord("baby") != null); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.IsWord("xxx") != null); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.IsWord("baby") != null); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.IsWord("dad") != null); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.IsWord("daddy") != null); } }
public void Can_find_within_numeric_range() { var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_within_numeric_range.tri"); var tree = new LcrsTrie(); tree.Add("0000123"); tree.Add("0000333"); tree.Add("0012345"); tree.Add("0100006"); tree.Add("1000989"); tree.Add("0077777"); tree.Add("0000666"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } File.WriteAllText("Can_find_within_numeric_range.log", tree.Visualize(), Encoding.UTF8); tree.Serialize(fileName); IList <Word> words; using (var reader = new MappedTrieReader(fileName)) { words = reader.Range("0000333", "0100006").ToList(); } Assert.AreEqual(5, words.Count); Assert.AreEqual("0000333", words[0].Value); Assert.AreEqual("0000666", words[1].Value); Assert.AreEqual("0012345", words[2].Value); Assert.AreEqual("0077777", words[3].Value); Assert.AreEqual("0100006", words[4].Value); }
public void Can_deserialize_whole_file() { var dir = CreateDir(); var fileName = Path.Combine(dir, "MappedTrieReaderTests.Can_deserialize_whole_file.tri"); var tree = new LcrsTrie('\0', false); tree.Add("baby"); tree.Add("bad"); tree.Add("badness"); tree.Add("bank"); tree.Add("box"); tree.Add("dad"); tree.Add("dance"); tree.Add("flower"); tree.Add("flowers"); tree.Add("globe"); tree.Add("global"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } Assert.IsTrue(tree.IsWord("baby").Any()); Assert.IsTrue(tree.IsWord("bad").Any()); Assert.IsTrue(tree.IsWord("badness").Any()); Assert.IsTrue(tree.IsWord("bank").Any()); Assert.IsTrue(tree.IsWord("box").Any()); Assert.IsTrue(tree.IsWord("dad").Any()); Assert.IsTrue(tree.IsWord("dance").Any()); Assert.IsTrue(tree.IsWord("flower").Any()); Assert.IsTrue(tree.IsWord("flowers").Any()); Assert.IsTrue(tree.IsWord("globe").Any()); Assert.IsTrue(tree.IsWord("global").Any()); tree.Serialize(fileName); File.WriteAllText("Can_deserialize_whole_file.log", tree.Visualize(), System.Text.Encoding.UTF8); var recreated = Serializer.DeserializeTrie(dir, new FileInfo(fileName).Name); Assert.IsTrue(recreated.IsWord("baby").Any()); Assert.IsTrue(recreated.IsWord("bad").Any()); Assert.IsTrue(recreated.IsWord("badness").Any()); Assert.IsTrue(recreated.IsWord("bank").Any()); Assert.IsTrue(recreated.IsWord("box").Any()); Assert.IsTrue(recreated.IsWord("dad").Any()); Assert.IsTrue(recreated.IsWord("dance").Any()); Assert.IsTrue(recreated.IsWord("flower").Any()); Assert.IsTrue(recreated.IsWord("flowers").Any()); Assert.IsTrue(recreated.IsWord("globe").Any()); Assert.IsTrue(recreated.IsWord("global").Any()); }
public void Can_find_exact() { var fileName = Path.Combine(Dir, "Can_find_exact_mm.tri"); var tree = new LcrsTrie('\0', false); tree.Add("xor"); tree.Add("xxx"); tree.Add("donkey"); tree.Add("xavier"); tree.Serialize(fileName); Word word; using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.HasWord("xxx", out word)); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsFalse(reader.HasWord("baby", out word)); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsFalse(reader.HasWord("dad", out word)); } tree.Add("baby"); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.HasWord("xxx", out word)); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.HasWord("baby", out word)); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsFalse(reader.HasWord("dad", out word)); } tree.Add("dad"); tree.Add("daddy"); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.HasWord("xxx", out word)); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.HasWord("baby", out word)); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.HasWord("dad", out word)); } using (var reader = new MappedTrieReader(fileName)) { Assert.IsTrue(reader.HasWord("daddy", out word)); } }
public void Can_find_near() { var fileName = Path.Combine(Dir, "Can_find_near.tri"); var tree = new LcrsTrie('\0', false); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(0, near.Count); } tree.Add("bad"); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("bad")); } tree.Add("baby"); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("bad")); } tree.Add("b"); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(2, near.Count); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); } using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("ba", 2).Select(w => w.Value).ToList(); Assert.AreEqual(3, near.Count); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); } using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("ba", 0).Select(w => w.Value).ToList(); Assert.AreEqual(0, near.Count); } tree.Add("bananas"); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("ba", 6).Select(w => w.Value).ToList(); Assert.AreEqual(4, near.Count); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bananas")); } using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("bazy", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("baby")); } tree.Add("bank"); tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.Near("bazy", 3).Select(w => w.Value).ToList(); Assert.AreEqual(4, near.Count); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bank")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); } }
public void Can_find_prefixed() { var fileName = Path.Combine(Dir, "Can_find_prefixed.tri"); var tree = new LcrsTrie('\0', false); tree.Add("rambo"); tree.Add("rambo"); tree.Add("2"); tree.Add("rocky"); tree.Add("2"); tree.Add("raiders"); tree.Add("of"); tree.Add("the"); tree.Add("lost"); tree.Add("ark"); tree.Add("rain"); tree.Add("man"); tree.Serialize(fileName); var prefixed = new MappedTrieReader(fileName).StartsWith("ra").Select(w => w.Value).ToList(); Assert.AreEqual(3, prefixed.Count); Assert.IsTrue(prefixed.Contains("rambo")); Assert.IsTrue(prefixed.Contains("raiders")); Assert.IsTrue(prefixed.Contains("rain")); }
public void Can_find_prefixed() { var tree = new LcrsTrie('\0', false); tree.Add("rambo"); tree.Add("rambo"); tree.Add("2"); tree.Add("rocky"); tree.Add("2"); tree.Add("raiders"); tree.Add("of"); tree.Add("the"); tree.Add("lost"); tree.Add("ark"); tree.Add("rain"); tree.Add("man"); var prefixed = tree.StartsWith("ra").Select(w => w.Value).ToList(); Assert.That(prefixed.Count, Is.EqualTo(3)); Assert.IsTrue(prefixed.Contains("rambo")); Assert.IsTrue(prefixed.Contains("raiders")); Assert.IsTrue(prefixed.Contains("rain")); }
public void Can_find_prefixed() { var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_prefixed.tri"); var tree = new LcrsTrie('\0', false); tree.Add("rambo"); tree.Add("rambo"); tree.Add("2"); tree.Add("rocky"); tree.Add("2"); tree.Add("raiders"); tree.Add("of"); tree.Add("the"); tree.Add("lost"); tree.Add("ark"); tree.Add("rain"); tree.Add("man"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } tree.Serialize(fileName); var prefixed = new MappedTrieReader(fileName).StartsWith("ra").Select(w => w.Value).ToList(); Assert.AreEqual(3, prefixed.Count); Assert.IsTrue(prefixed.Contains("rambo")); Assert.IsTrue(prefixed.Contains("raiders")); Assert.IsTrue(prefixed.Contains("rain")); }
public void Can_find_near() { var fileName = Path.Combine(CreateDir(), "MappedTrieReaderTests.Can_find_near.tri"); var tree = new LcrsTrie(); tree.Add("bad"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("bad")); } tree = new LcrsTrie(); tree.Add("baby"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } tree.Serialize(fileName); File.WriteAllText("Can_find_near.log", tree.Visualize(), System.Text.Encoding.UTF8); using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("bad")); } tree = new LcrsTrie(); tree.Add("b"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } tree.Serialize(fileName); File.WriteAllText("Can_find_near.log", tree.Visualize(), System.Text.Encoding.UTF8); using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("ba", 1).Select(w => w.Value).ToList(); Assert.AreEqual(2, near.Count); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); } using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("ba", 2).Select(w => w.Value).ToList(); Assert.AreEqual(3, near.Count); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); } using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("ba", 0).Select(w => w.Value).ToList(); Assert.AreEqual(0, near.Count); } tree = new LcrsTrie(); tree.Add("bananas"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("ba", 6).Select(w => w.Value).ToList(); Assert.AreEqual(4, near.Count); Assert.IsTrue(near.Contains("b")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bananas")); } using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("bazy", 1).Select(w => w.Value).ToList(); Assert.AreEqual(1, near.Count); Assert.IsTrue(near.Contains("baby")); } tree = new LcrsTrie(); tree.Add("bank"); foreach (var node in tree.EndOfWordNodes()) { node.PostingsAddress = new BlockInfo(long.MinValue, int.MinValue); } tree.Serialize(fileName); using (var reader = new MappedTrieReader(fileName)) { var near = reader.SemanticallyNear("bazy", 3).Select(w => w.Value).ToList(); Assert.AreEqual(4, near.Count); Assert.IsTrue(near.Contains("baby")); Assert.IsTrue(near.Contains("bank")); Assert.IsTrue(near.Contains("bad")); Assert.IsTrue(near.Contains("b")); } }