private void InitTrie(string key) { if (!_tries.ContainsKey(key)) { _tries[key] = new LcrsTrie(); } }
private static void SerializeDepthFirst(this LcrsTrie trie, Stream stream) { var stack = new Stack <LcrsNode>(); var node = new LcrsNode(trie, 0, trie.Weight, trie.PostingsAddress); while (node != null) { node.Serialize(stream); if (node.Tree.RightSibling != null) { stack.Push(new LcrsNode( node.Tree.RightSibling, node.Depth, node.Tree.RightSibling.Weight, node.Tree.RightSibling.PostingsAddress)); } if (node.Tree.LeftChild != null) { node = new LcrsNode( node.Tree.LeftChild, (short)(node.Depth + 1), node.Tree.LeftChild.Weight, node.Tree.LeftChild.PostingsAddress); } else if (stack.Count > 0) { node = stack.Pop(); } else { break; } } }
public void Add(string path) { if (string.IsNullOrWhiteSpace(path)) { throw new ArgumentException("word"); } var key = path[0]; var eow = path.Length == 1; LcrsTrie node; if (!TryGetChild(key, out node)) { node = new LcrsTrie(key, eow); node.RightSibling = LeftChild; LeftChild = node; } else { if (!node.EndOfWord) { node.EndOfWord = eow; } } if (!eow) { node.Add(path.Substring(1)); } }
public static LcrsTrie DeserializeTrie(string directory, string searchPattern) { var root = new LcrsTrie(); LcrsTrie next = null; foreach (var fileName in Directory.GetFiles(directory, searchPattern).OrderBy(f => f)) { using (var reader = new MappedTrieReader(fileName)) { var trie = reader.ReadWholeFile(); if (next == null) { root.LeftChild = trie; } else { next.RightSibling = trie; } next = trie; } } return(root); }
public static void Serialize(this LcrsTrie trie, string fileName) { using (var stream = new FileStream( fileName, FileMode.Create, FileAccess.Write, FileShare.None)) { trie.Serialize(stream); } }
public static void Serialize(this LcrsTrie trie, Stream treeStream) { if (trie.LeftChild != null) { trie.LeftChild.SerializeDepthFirst(treeStream, 0); LcrsNode.MinValue.Serialize(treeStream); } }
public LcrsNode(LcrsTrie trie, short depth, int weight, BlockInfo?postingsAddress) { Value = trie.Value; HaveSibling = trie.RightSibling != null; HaveChild = trie.LeftChild != null; EndOfWord = trie.EndOfWord; Depth = depth; Weight = weight; PostingsAddress = postingsAddress; }
public static void Serialize(this LcrsTrie trie, string fileName) { using (var stream = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.None)) { if (trie.LeftChild != null) { trie.LeftChild.SerializeDepthFirst(stream, 0, 0); } } }
public LcrsNode(char value, bool haveSibling, bool haveChild, bool endOfWord, short depth, int weight, BlockInfo?postingsAddress) { Tree = null; Value = value; HaveSibling = haveSibling; HaveChild = haveChild; EndOfWord = endOfWord; Depth = depth; Weight = weight; PostingsAddress = postingsAddress; }
private void Visualize(LcrsTrie node, StringBuilder output, int depth) { if (node == null) return; output.Append('\t', depth); output.Append(node.Value.ToString() + " "); output.AppendLine(); Visualize(node.LeftChild, output, depth + 1); Visualize(node.RightSibling, output, depth); }
public void Merge(LcrsTrie other) { var words = new List <Word>(); other.LeftChild.DepthFirst(string.Empty, new List <char>(), words); var nodes = other.LeftChild.EndOfWordNodes().ToArray(); for (int index = 0; index < nodes.Length; index++) { Add(words[index].Value, nodes[index].Postings.ToArray()); } }
private LcrsTrie GetTree(string key) { LcrsTrie trie; var hashedKey = key.ToHash(); if (!_tries.TryGetValue(hashedKey, out trie)) { trie = new LcrsTrie(); _tries.Add(hashedKey, trie); } return(trie); }
private static void SerializeDepthFirst( this LcrsTrie trie, Stream treeStream, short depth) { new LcrsNode(trie, depth, trie.Weight, trie.PostingsAddress).Serialize(treeStream); if (trie.LeftChild != null) { trie.LeftChild.SerializeDepthFirst(treeStream, (short)(depth + 1)); } if (trie.RightSibling != null) { trie.RightSibling.SerializeDepthFirst(treeStream, depth); } }
public void Add(WordInfo word) { _timer.Start(); LcrsTrie trie; var key = word.Field.ToHash().ToString(); if (!_tries.TryGetValue(key, out trie)) { trie = new LcrsTrie(); _tries.Add(key, trie); } trie.Add(word.Token, word.Posting); }
public void Add(string key, string value, DocumentPosting posting) { _timer.Start(); LcrsTrie trie; var hashedKey = key.ToHash(); if (!_tries.TryGetValue(hashedKey, out trie)) { trie = new LcrsTrie(); _tries.Add(hashedKey, trie); } trie.Add(value, 0, posting); }
private static void SerializeDepthFirst(this LcrsTrie trie, Stream stream, short depth) { var bytes = new LcrsNode(trie, depth, trie.Weight, trie.PostingsAddress).Serialize(); stream.Write(bytes, 0, bytes.Length); if (trie.LeftChild != null) { trie.LeftChild.SerializeDepthFirst(stream, (short)(depth + 1)); } if (trie.RightSibling != null) { trie.RightSibling.SerializeDepthFirst(stream, depth); } }
private bool TryGetChild(char c, out LcrsTrie node) { node = LeftChild; while (node != null) { if (node.Value == c) { return(true); } node = node.RightSibling; } node = null; return(false); }
private LcrsTrie Balance(LcrsTrie[] arr, int start, int end) { if (start > end) { return null; } int mid = (start + end) / 2; LcrsTrie node = arr[mid]; node.LeftChild = Balance(arr, start, mid - 1); node.RightSibling = Balance(arr, mid + 1, end); return node; }
public static void Serialize(this LcrsTrie trie, string fileName) { var dir = Path.GetDirectoryName(fileName); var version = Path.GetFileNameWithoutExtension(fileName); var sixFileName = Path.Combine(dir, version + ".six"); using (var sixStream = new FileStream(sixFileName, FileMode.Append, FileAccess.Write, FileShare.Read)) { FileStream treeStream; var segmentDelimiter = new LcrsNode(SegmentDelimiter, false, false, false, 0, 1, null); if (File.Exists(fileName)) { treeStream = new FileStream( fileName, FileMode.Append, FileAccess.Write, FileShare.Read); segmentDelimiter.Serialize(treeStream); } else { treeStream = new FileStream( fileName, FileMode.Append, FileAccess.Write, FileShare.None); } var position = treeStream.Position; var posBytes = BitConverter.GetBytes(position); if (!BitConverter.IsLittleEndian) { Array.Reverse(posBytes); } sixStream.Write(posBytes, 0, sizeof(long)); using (treeStream) { if (trie.LeftChild != null) { trie.LeftChild.SerializeDepthFirst(treeStream, 0); } } } }
public bool TryFindPath(char[] path, out LcrsTrie leaf) { var node = LeftChild; var index = 0; // Find path[index] in a binary (left-right) tree. // Stop when destination has been reached. while (true) { if (node == null) { break; } if (node.Value.Equals(path[index])) { if (index + 1 == path.Length) { // destination has been reached leaf = node; return(true); } else { // go deep when you've found c index++; node = node.LeftChild; } } else { // go right when you are looking for c node = node.RightSibling; } } leaf = null; return(false); }
private static void SerializeDepthFirst(this LcrsTrie trie, Stream stream, short depth, int count) { if (count++ > 1000 * 100) { Log.Info("cut off trie at 1000 * 100"); return; } new LcrsNode(trie, depth, trie.Weight, trie.PostingsAddress).Serialize(stream); if (trie.LeftChild != null) { trie.LeftChild.SerializeDepthFirst(stream, (short)(depth + 1), count); } if (trie.RightSibling != null) { trie.RightSibling.SerializeDepthFirst(stream, depth, count); } }
private LcrsTrie Balance(LcrsTrie[] arr, int start, int end) { // this will distort the tree // TODO: balance a sorted list of strings instead of a list of nodes if (start > end) { return(null); } int mid = (start + end) / 2; LcrsTrie node = arr[mid]; node.LeftChild = Balance(arr, start, mid - 1); node.RightSibling = Balance(arr, mid + 1, end); return(node); }
public bool TryFindPath(string path, out LcrsTrie leaf) { var child = LeftChild; while (child != null) { if (child.Value.Equals(path[0])) { break; } child = child.RightSibling; } if (child != null) { if (path.Length == 1) { leaf = child; return(true); } return(child.TryFindPath(path.Substring(1), out leaf)); } leaf = null; return(false); }
public void Add(string word, int index, AnalyzedTerm term) { if (string.IsNullOrWhiteSpace(word)) { throw new ArgumentException("word"); } if (index == word.Length) { return; } var key = word[index]; var eow = word.Length == index + 1; LcrsTrie node; if (!TryGetChild(key, out node)) { node = new LcrsTrie(key, eow); if (LeftChild == null) { LeftChild = node; } else { // place new node in lexical order if (LeftChild.Value > node.Value) { var tmp = LeftChild; LeftChild = node; node.RightSibling = tmp; } else { var sibling = LeftChild; while (true) { if (sibling.Value < node.Value && (sibling.RightSibling == null || sibling.RightSibling.Value > node.Value)) { break; } sibling = sibling.RightSibling; } var rightSibling = sibling.RightSibling; sibling.RightSibling = node; node.RightSibling = rightSibling; } } } if (eow) { node.EndOfWord = true; if (node.PostingsStream == null) { node.PostingsStream = new MemoryStream(); } term.Serialize(node.PostingsStream); if (node.Size > 100000 && node.WriteToDisk == false) { var fn = Path.Combine(Directory.GetCurrentDirectory(), Path.GetRandomFileName()); var fs = new FileStream(fn, FileMode.Create, FileAccess.ReadWrite, FileShare.None, 4096, FileOptions.DeleteOnClose); node.PostingsStream.Position = 0; node.PostingsStream.CopyTo(fs); node.PostingsStream.Dispose(); node.PostingsStream = fs; node.WriteToDisk = true; } else if (node.WriteToDisk == false) { node.Size += term.Positions.Count; } } else { node.Add(word, index + 1, term); } }
public void Add(string word, params DocumentPosting[] postings) { if (string.IsNullOrWhiteSpace(word)) { throw new ArgumentException("word"); } var key = word[0]; var eow = word.Length == 1; LcrsTrie node; if (!TryGetChild(key, out node)) { node = new LcrsTrie(key, eow); if (LeftChild == null) { LeftChild = node; } else { // place new node in lexical order if (LeftChild.Value > node.Value) { var tmp = LeftChild; LeftChild = node; node.RightSibling = tmp; } else { var sibling = LeftChild; while (true) { if (sibling.Value < node.Value && (sibling.RightSibling == null || sibling.RightSibling.Value > node.Value)) { break; } sibling = sibling.RightSibling; } var rightSibling = sibling.RightSibling; sibling.RightSibling = node; node.RightSibling = rightSibling; } } } else if (eow) { node.EndOfWord = true; node.WordCount++; } if (eow) { if (node.Postings == null) { node.Postings = new List <DocumentPosting>(); } foreach (var posting in postings) { node.Postings.Add(posting); } } else { node.Add(word.Substring(1), postings); } }