Пример #1
0
        public static void Serialize(this AnalyzedTerm term, Stream stream)
        {
            foreach (var position in term.Positions)
            {
                byte[] idBytes   = BitConverter.GetBytes(term.DocumentId);
                byte[] dataBytes = BitConverter.GetBytes(position);

                if (!BitConverter.IsLittleEndian)
                {
                    Array.Reverse(idBytes);
                    Array.Reverse(dataBytes);
                }

                stream.Write(idBytes, 0, sizeof(int));
                stream.Write(dataBytes, 0, sizeof(int));
            }
        }
Пример #2
0
        public void Add(string word, int index, AnalyzedTerm term)
        {
            if (string.IsNullOrWhiteSpace(word))
            {
                throw new ArgumentException("word");
            }

            if (index == word.Length)
            {
                return;
            }

            var key = word[index];
            var eow = word.Length == index + 1;

            LcrsTrie node;

            if (!TryGetChild(key, out node))
            {
                node = new LcrsTrie(key, eow);

                if (LeftChild == null)
                {
                    LeftChild = node;
                }
                else
                {
                    // place new node in lexical order

                    if (LeftChild.Value > node.Value)
                    {
                        var tmp = LeftChild;
                        LeftChild         = node;
                        node.RightSibling = tmp;
                    }
                    else
                    {
                        var sibling = LeftChild;

                        while (true)
                        {
                            if (sibling.Value < node.Value && (sibling.RightSibling == null ||
                                                               sibling.RightSibling.Value > node.Value))
                            {
                                break;
                            }
                            sibling = sibling.RightSibling;
                        }
                        var rightSibling = sibling.RightSibling;
                        sibling.RightSibling = node;
                        node.RightSibling    = rightSibling;
                    }
                }
            }

            if (eow)
            {
                node.EndOfWord = true;

                if (node.PostingsStream == null)
                {
                    node.PostingsStream = new MemoryStream();
                }

                term.Serialize(node.PostingsStream);

                if (node.Size > 100000 && node.WriteToDisk == false)
                {
                    var fn = Path.Combine(Directory.GetCurrentDirectory(), Path.GetRandomFileName());
                    var fs = new FileStream(fn, FileMode.Create, FileAccess.ReadWrite,
                                            FileShare.None, 4096, FileOptions.DeleteOnClose);
                    node.PostingsStream.Position = 0;
                    node.PostingsStream.CopyTo(fs);
                    node.PostingsStream.Dispose();

                    node.PostingsStream = fs;
                    node.WriteToDisk    = true;
                }
                else if (node.WriteToDisk == false)
                {
                    node.Size += term.Positions.Count;
                }
            }
            else
            {
                node.Add(word, index + 1, term);
            }
        }
Пример #3
0
        public void Add(string key, string value, AnalyzedTerm term)
        {
            var tree = GetTree(key);

            tree.Add(value, 0, term);
        }