Beispiel #1
0
 private void InitTrie(string key)
 {
     if (!_tries.ContainsKey(key))
     {
         _tries[key] = new LcrsTrie();
     }
 }
Beispiel #2
0
        private static void SerializeDepthFirst(this LcrsTrie trie, Stream stream)
        {
            var stack = new Stack <LcrsNode>();
            var node  = new LcrsNode(trie, 0, trie.Weight, trie.PostingsAddress);

            while (node != null)
            {
                node.Serialize(stream);

                if (node.Tree.RightSibling != null)
                {
                    stack.Push(new LcrsNode(
                                   node.Tree.RightSibling, node.Depth,
                                   node.Tree.RightSibling.Weight, node.Tree.RightSibling.PostingsAddress));
                }

                if (node.Tree.LeftChild != null)
                {
                    node = new LcrsNode(
                        node.Tree.LeftChild, (short)(node.Depth + 1),
                        node.Tree.LeftChild.Weight, node.Tree.LeftChild.PostingsAddress);
                }
                else if (stack.Count > 0)
                {
                    node = stack.Pop();
                }
                else
                {
                    break;
                }
            }
        }
Beispiel #3
0
        public void Add(string path)
        {
            if (string.IsNullOrWhiteSpace(path))
            {
                throw new ArgumentException("word");
            }

            var key = path[0];
            var eow = path.Length == 1;

            LcrsTrie node;

            if (!TryGetChild(key, out node))
            {
                node = new LcrsTrie(key, eow);
                node.RightSibling = LeftChild;
                LeftChild         = node;
            }
            else
            {
                if (!node.EndOfWord)
                {
                    node.EndOfWord = eow;
                }
            }

            if (!eow)
            {
                node.Add(path.Substring(1));
            }
        }
Beispiel #4
0
        public static LcrsTrie DeserializeTrie(string directory, string searchPattern)
        {
            var      root = new LcrsTrie();
            LcrsTrie next = null;

            foreach (var fileName in Directory.GetFiles(directory, searchPattern).OrderBy(f => f))
            {
                using (var reader = new MappedTrieReader(fileName))
                {
                    var trie = reader.ReadWholeFile();

                    if (next == null)
                    {
                        root.LeftChild = trie;
                    }
                    else
                    {
                        next.RightSibling = trie;
                    }
                    next = trie;
                }
            }

            return(root);
        }
Beispiel #5
0
 public static void Serialize(this LcrsTrie trie, string fileName)
 {
     using (var stream = new FileStream(
                fileName, FileMode.Create, FileAccess.Write, FileShare.None))
     {
         trie.Serialize(stream);
     }
 }
Beispiel #6
0
 public static void Serialize(this LcrsTrie trie, Stream treeStream)
 {
     if (trie.LeftChild != null)
     {
         trie.LeftChild.SerializeDepthFirst(treeStream, 0);
         LcrsNode.MinValue.Serialize(treeStream);
     }
 }
Beispiel #7
0
 public LcrsNode(LcrsTrie trie, short depth, int weight, BlockInfo?postingsAddress)
 {
     Value           = trie.Value;
     HaveSibling     = trie.RightSibling != null;
     HaveChild       = trie.LeftChild != null;
     EndOfWord       = trie.EndOfWord;
     Depth           = depth;
     Weight          = weight;
     PostingsAddress = postingsAddress;
 }
Beispiel #8
0
 public static void Serialize(this LcrsTrie trie, string fileName)
 {
     using (var stream = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.None))
     {
         if (trie.LeftChild != null)
         {
             trie.LeftChild.SerializeDepthFirst(stream, 0, 0);
         }
     }
 }
Beispiel #9
0
 public LcrsNode(char value, bool haveSibling, bool haveChild, bool endOfWord, short depth, int weight, BlockInfo?postingsAddress)
 {
     Tree            = null;
     Value           = value;
     HaveSibling     = haveSibling;
     HaveChild       = haveChild;
     EndOfWord       = endOfWord;
     Depth           = depth;
     Weight          = weight;
     PostingsAddress = postingsAddress;
 }
Beispiel #10
0
        private void Visualize(LcrsTrie node, StringBuilder output, int depth)
        {
            if (node == null) return;

            output.Append('\t', depth);
            output.Append(node.Value.ToString() + " ");
            output.AppendLine();

            Visualize(node.LeftChild, output, depth + 1);
            Visualize(node.RightSibling, output, depth);
        }
Beispiel #11
0
        public void Merge(LcrsTrie other)
        {
            var words = new List <Word>();

            other.LeftChild.DepthFirst(string.Empty, new List <char>(), words);

            var nodes = other.LeftChild.EndOfWordNodes().ToArray();

            for (int index = 0; index < nodes.Length; index++)
            {
                Add(words[index].Value, nodes[index].Postings.ToArray());
            }
        }
Beispiel #12
0
        private LcrsTrie GetTree(string key)
        {
            LcrsTrie trie;
            var      hashedKey = key.ToHash();

            if (!_tries.TryGetValue(hashedKey, out trie))
            {
                trie = new LcrsTrie();
                _tries.Add(hashedKey, trie);
            }

            return(trie);
        }
Beispiel #13
0
        private static void SerializeDepthFirst(
            this LcrsTrie trie, Stream treeStream, short depth)
        {
            new LcrsNode(trie, depth, trie.Weight, trie.PostingsAddress).Serialize(treeStream);

            if (trie.LeftChild != null)
            {
                trie.LeftChild.SerializeDepthFirst(treeStream, (short)(depth + 1));
            }

            if (trie.RightSibling != null)
            {
                trie.RightSibling.SerializeDepthFirst(treeStream, depth);
            }
        }
Beispiel #14
0
        public void Add(WordInfo word)
        {
            _timer.Start();

            LcrsTrie trie;

            var key = word.Field.ToHash().ToString();

            if (!_tries.TryGetValue(key, out trie))
            {
                trie = new LcrsTrie();
                _tries.Add(key, trie);
            }

            trie.Add(word.Token, word.Posting);
        }
Beispiel #15
0
        public void Add(string key, string value, DocumentPosting posting)
        {
            _timer.Start();

            LcrsTrie trie;

            var hashedKey = key.ToHash();

            if (!_tries.TryGetValue(hashedKey, out trie))
            {
                trie = new LcrsTrie();
                _tries.Add(hashedKey, trie);
            }

            trie.Add(value, 0, posting);
        }
Beispiel #16
0
        private static void SerializeDepthFirst(this LcrsTrie trie, Stream stream, short depth)
        {
            var bytes = new LcrsNode(trie, depth, trie.Weight, trie.PostingsAddress).Serialize();

            stream.Write(bytes, 0, bytes.Length);

            if (trie.LeftChild != null)
            {
                trie.LeftChild.SerializeDepthFirst(stream, (short)(depth + 1));
            }

            if (trie.RightSibling != null)
            {
                trie.RightSibling.SerializeDepthFirst(stream, depth);
            }
        }
Beispiel #17
0
        private bool TryGetChild(char c, out LcrsTrie node)
        {
            node = LeftChild;

            while (node != null)
            {
                if (node.Value == c)
                {
                    return(true);
                }
                node = node.RightSibling;
            }

            node = null;
            return(false);
        }
Beispiel #18
0
        private LcrsTrie Balance(LcrsTrie[] arr, int start, int end)
        {
            if (start > end)
            {
                return null;
            }

            int mid = (start + end) / 2;

            LcrsTrie node = arr[mid];

            node.LeftChild = Balance(arr, start, mid - 1);

            node.RightSibling = Balance(arr, mid + 1, end);

            return node;
        }
Beispiel #19
0
        public static void Serialize(this LcrsTrie trie, string fileName)
        {
            var dir         = Path.GetDirectoryName(fileName);
            var version     = Path.GetFileNameWithoutExtension(fileName);
            var sixFileName = Path.Combine(dir, version + ".six");

            using (var sixStream = new FileStream(sixFileName, FileMode.Append, FileAccess.Write, FileShare.Read))
            {
                FileStream treeStream;

                var segmentDelimiter = new LcrsNode(SegmentDelimiter, false, false, false, 0, 1, null);

                if (File.Exists(fileName))
                {
                    treeStream = new FileStream(
                        fileName, FileMode.Append, FileAccess.Write, FileShare.Read);

                    segmentDelimiter.Serialize(treeStream);
                }
                else
                {
                    treeStream = new FileStream(
                        fileName, FileMode.Append, FileAccess.Write, FileShare.None);
                }

                var position = treeStream.Position;
                var posBytes = BitConverter.GetBytes(position);

                if (!BitConverter.IsLittleEndian)
                {
                    Array.Reverse(posBytes);
                }

                sixStream.Write(posBytes, 0, sizeof(long));

                using (treeStream)
                {
                    if (trie.LeftChild != null)
                    {
                        trie.LeftChild.SerializeDepthFirst(treeStream, 0);
                    }
                }
            }
        }
Beispiel #20
0
        public bool TryFindPath(char[] path, out LcrsTrie leaf)
        {
            var node  = LeftChild;
            var index = 0;

            // Find path[index] in a binary (left-right) tree.
            // Stop when destination has been reached.

            while (true)
            {
                if (node == null)
                {
                    break;
                }

                if (node.Value.Equals(path[index]))
                {
                    if (index + 1 == path.Length)
                    {
                        // destination has been reached

                        leaf = node;
                        return(true);
                    }
                    else
                    {
                        // go deep when you've found c

                        index++;
                        node = node.LeftChild;
                    }
                }
                else
                {
                    // go right when you are looking for c

                    node = node.RightSibling;
                }
            }
            leaf = null;
            return(false);
        }
Beispiel #21
0
        private static void SerializeDepthFirst(this LcrsTrie trie, Stream stream, short depth, int count)
        {
            if (count++ > 1000 * 100)
            {
                Log.Info("cut off trie at 1000 * 100");
                return;
            }

            new LcrsNode(trie, depth, trie.Weight, trie.PostingsAddress).Serialize(stream);

            if (trie.LeftChild != null)
            {
                trie.LeftChild.SerializeDepthFirst(stream, (short)(depth + 1), count);
            }

            if (trie.RightSibling != null)
            {
                trie.RightSibling.SerializeDepthFirst(stream, depth, count);
            }
        }
Beispiel #22
0
        private LcrsTrie Balance(LcrsTrie[] arr, int start, int end)
        {
            // this will distort the tree
            // TODO: balance a sorted list of strings instead of a list of nodes

            if (start > end)
            {
                return(null);
            }

            int mid = (start + end) / 2;

            LcrsTrie node = arr[mid];

            node.LeftChild = Balance(arr, start, mid - 1);

            node.RightSibling = Balance(arr, mid + 1, end);

            return(node);
        }
Beispiel #23
0
        public bool TryFindPath(string path, out LcrsTrie leaf)
        {
            var child = LeftChild;

            while (child != null)
            {
                if (child.Value.Equals(path[0]))
                {
                    break;
                }
                child = child.RightSibling;
            }
            if (child != null)
            {
                if (path.Length == 1)
                {
                    leaf = child;
                    return(true);
                }
                return(child.TryFindPath(path.Substring(1), out leaf));
            }
            leaf = null;
            return(false);
        }
Beispiel #24
0
        public void Add(string word, int index, AnalyzedTerm term)
        {
            if (string.IsNullOrWhiteSpace(word))
            {
                throw new ArgumentException("word");
            }

            if (index == word.Length)
            {
                return;
            }

            var key = word[index];
            var eow = word.Length == index + 1;

            LcrsTrie node;

            if (!TryGetChild(key, out node))
            {
                node = new LcrsTrie(key, eow);

                if (LeftChild == null)
                {
                    LeftChild = node;
                }
                else
                {
                    // place new node in lexical order

                    if (LeftChild.Value > node.Value)
                    {
                        var tmp = LeftChild;
                        LeftChild         = node;
                        node.RightSibling = tmp;
                    }
                    else
                    {
                        var sibling = LeftChild;

                        while (true)
                        {
                            if (sibling.Value < node.Value && (sibling.RightSibling == null ||
                                                               sibling.RightSibling.Value > node.Value))
                            {
                                break;
                            }
                            sibling = sibling.RightSibling;
                        }
                        var rightSibling = sibling.RightSibling;
                        sibling.RightSibling = node;
                        node.RightSibling    = rightSibling;
                    }
                }
            }

            if (eow)
            {
                node.EndOfWord = true;

                if (node.PostingsStream == null)
                {
                    node.PostingsStream = new MemoryStream();
                }

                term.Serialize(node.PostingsStream);

                if (node.Size > 100000 && node.WriteToDisk == false)
                {
                    var fn = Path.Combine(Directory.GetCurrentDirectory(), Path.GetRandomFileName());
                    var fs = new FileStream(fn, FileMode.Create, FileAccess.ReadWrite,
                                            FileShare.None, 4096, FileOptions.DeleteOnClose);
                    node.PostingsStream.Position = 0;
                    node.PostingsStream.CopyTo(fs);
                    node.PostingsStream.Dispose();

                    node.PostingsStream = fs;
                    node.WriteToDisk    = true;
                }
                else if (node.WriteToDisk == false)
                {
                    node.Size += term.Positions.Count;
                }
            }
            else
            {
                node.Add(word, index + 1, term);
            }
        }
Beispiel #25
0
        public void Add(string word, params DocumentPosting[] postings)
        {
            if (string.IsNullOrWhiteSpace(word))
            {
                throw new ArgumentException("word");
            }

            var key = word[0];
            var eow = word.Length == 1;

            LcrsTrie node;

            if (!TryGetChild(key, out node))
            {
                node = new LcrsTrie(key, eow);

                if (LeftChild == null)
                {
                    LeftChild = node;
                }
                else
                {
                    // place new node in lexical order

                    if (LeftChild.Value > node.Value)
                    {
                        var tmp = LeftChild;
                        LeftChild         = node;
                        node.RightSibling = tmp;
                    }
                    else
                    {
                        var sibling = LeftChild;

                        while (true)
                        {
                            if (sibling.Value < node.Value && (sibling.RightSibling == null || sibling.RightSibling.Value > node.Value))
                            {
                                break;
                            }
                            sibling = sibling.RightSibling;
                        }
                        var rightSibling = sibling.RightSibling;
                        sibling.RightSibling = node;
                        node.RightSibling    = rightSibling;
                    }
                }
            }
            else if (eow)
            {
                node.EndOfWord = true;
                node.WordCount++;
            }

            if (eow)
            {
                if (node.Postings == null)
                {
                    node.Postings = new List <DocumentPosting>();
                }
                foreach (var posting in postings)
                {
                    node.Postings.Add(posting);
                }
            }
            else
            {
                node.Add(word.Substring(1), postings);
            }
        }