예제 #1
0
 public static void MergePostings(this VectorNode target, VectorNode source)
 {
     if (source.PostingsOffsets != null)
     {
         ((List <long>)target.PostingsOffsets).AddRange(source.PostingsOffsets);
     }
 }
예제 #2
0
 public static void AddDocId(VectorNode target, VectorNode node)
 {
     foreach (var docId in node.DocIds)
     {
         target.DocIds.Add(docId);
     }
 }
예제 #3
0
        public static void Serialize(this VectorNode node, Stream stream)
        {
            long terminator = 1;

            if (node.Left == null && node.Right == null) // there are no children
            {
                terminator = 3;
            }
            else if (node.Left == null) // there is a right but no left
            {
                terminator = 2;
            }
            else if (node.Right == null) // there is a left but no right
            {
                terminator = 1;
            }
            else // there is a left and a right
            {
                terminator = 0;
            }

            Span <long> span = stackalloc long[5];

            span[0] = node.VectorOffset;
            span[1] = node.PostingsOffset;
            span[2] = node.Vector.ComponentCount;
            span[3] = node.Weight;
            span[4] = terminator;

            stream.Write(MemoryMarshal.Cast <long, byte>(span));
        }
예제 #4
0
 public static void MergeDocIds(this VectorNode target, VectorNode source)
 {
     if (source.DocIds != null)
     {
         target.DocIds.AddRange(source.DocIds);
     }
 }
예제 #5
0
        public static void SetIdsOnAllNodes(VectorNode root)
        {
            var  node  = root.ComponentCount == 0 ? root.Right : root;
            var  stack = new Stack <VectorNode>();
            long id    = 0;

            while (node != null)
            {
                node.PostingsOffset = id++;

                if (node.Right != null)
                {
                    stack.Push(node.Right);
                }

                node = node.Left;

                if (node == null)
                {
                    if (stack.Count > 0)
                    {
                        node = stack.Pop();
                    }
                }
            }
        }
예제 #6
0
        public static IEnumerable <VectorNode> All(VectorNode root)
        {
            var node  = root.ComponentCount == 0 ? root.Right : root;
            var stack = new Stack <VectorNode>();

            while (node != null)
            {
                yield return(node);

                if (node.Right != null)
                {
                    stack.Push(node.Right);
                }

                node = node.Left;

                if (node == null)
                {
                    if (stack.Count > 0)
                    {
                        node = stack.Pop();
                    }
                }
            }
        }
예제 #7
0
        public static string Visualize(VectorNode root)
        {
            StringBuilder output = new StringBuilder();

            Visualize(root, output, 0);
            return(output.ToString());
        }
예제 #8
0
        public static Hit ClosestMatch(VectorNode root, IVector vector, IModel model)
        {
            var    best      = root;
            var    cursor    = root;
            double highscore = 0;

            while (cursor != null)
            {
                var angle = cursor.Vector == null ? 0 : model.CosAngle(vector, cursor.Vector);

                if (angle > model.FoldAngle)
                {
                    if (angle > highscore)
                    {
                        highscore = angle;
                        best      = cursor;
                    }

                    cursor = cursor.Left;
                }
                else
                {
                    if (angle > highscore)
                    {
                        highscore = angle;
                        best      = cursor;
                    }
                    cursor = cursor.Right;
                }
            }

            return(new Hit(best, highscore));
        }
예제 #9
0
        public static float[][] AsOneHotMatrix(VectorNode root)
        {
            var node   = root.Vector == null ? root.Right : root;
            var stack  = new Stack <VectorNode>();
            var matrix = new float[root.Weight][];
            var index  = 0;

            while (node != null)
            {
                var vector = new float[root.Weight];

                vector[index] = 1;
                matrix[index] = vector;

                index++;

                if (node.Right != null)
                {
                    stack.Push(node.Right);
                }

                node = node.Left;

                if (node == null)
                {
                    if (stack.Count > 0)
                    {
                        node = stack.Pop();
                    }
                }
            }

            return(matrix);
        }
예제 #10
0
        public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, Stream postingsStream, PageIndexWriter pageIndexWriter)
        {
            var page = column.SerializeTree(_ixStream, vectorStream, postingsStream);

            pageIndexWriter.Put(page.offset, page.length);

            return(PathFinder.Size(column));
        }
예제 #11
0
        public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, PageIndexWriter pageIndexWriter)
        {
            var page = GraphBuilder.SerializeTree(column, _ixStream, vectorStream, null);

            pageIndexWriter.Put(page.offset, page.length);

            return(PathFinder.Size(column));
        }
예제 #12
0
        public static long AppendSynchronized(
            VectorNode root,
            VectorNode node,
            IDistanceCalculator model,
            double foldAngle,
            double identicalAngle)
        {
            var cursor = root;

            while (true)
            {
                var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

                if (angle >= identicalAngle)
                {
                    return(cursor.PostingsOffset);
                }
                else if (angle > foldAngle)
                {
                    if (cursor.Left == null)
                    {
                        lock (cursor)
                        {
                            if (cursor.Left == null)
                            {
                                node.PostingsOffset = root.Weight;
                                cursor.Left         = node;
                                return(node.PostingsOffset);
                            }
                        }
                    }
                    else
                    {
                        cursor = cursor.Left;
                    }
                }
                else
                {
                    if (cursor.Right == null)
                    {
                        lock (cursor)
                        {
                            if (cursor.Right == null)
                            {
                                node.PostingsOffset = root.Weight;
                                cursor.Right        = node;
                                return(node.PostingsOffset);
                            }
                        }
                    }
                    else
                    {
                        cursor = cursor.Right;
                    }
                }
            }
        }
예제 #13
0
 public static void MergeDocIdsConcurrent(this VectorNode target, VectorNode source)
 {
     lock (target.Sync)
     {
         if (source.DocIds != null)
         {
             target.DocIds.AddRange(source.DocIds);
         }
     }
 }
예제 #14
0
        public static IEnumerable <VectorNode> LeftList(VectorNode root)
        {
            var node = root.Left;

            while (node != null)
            {
                yield return(node);

                node = node.Left;
            }
        }
예제 #15
0
        public static void AddRight(VectorNode parent, VectorNode node)
        {
            var target = parent;

            while (target.Right != null)
            {
                target = target.Right;
            }

            node.Right   = target.Right;
            target.Right = node;
        }
예제 #16
0
        public static int Depth(VectorNode node)
        {
            var count = 0;

            node = node.Left;

            while (node != null)
            {
                count++;
                node = node.Left;
            }
            return(count);
        }
예제 #17
0
        public static VectorNode Train <T>(IModel <T> model, params T[] data)
        {
            var root         = new VectorNode();
            var unclassified = new Queue <VectorNode>();

            foreach (var item in data)
            {
                foreach (var vector in model.Tokenize(item))
                {
                    VectorNode node;

                    if (!TryMergeOrAddSupervised(root, new VectorNode(vector), model, out node))
                    {
                        unclassified.Enqueue(node);
                    }
                }
            }

            var batchSize       = unclassified.Count;
            var numOfIterations = 0;
            var lastCount       = 0;

            while (unclassified.Count > 0)
            {
                VectorNode node;

                if (!TryMergeOrAddSupervised(root, unclassified.Dequeue(), model, out node))
                {
                    unclassified.Enqueue(node);
                }

                if (++numOfIterations % batchSize == 0)
                {
                    if (lastCount == unclassified.Count)
                    {
                        break;
                    }
                    else
                    {
                        lastCount = unclassified.Count;
                    }
                }
            }

            foreach (var node in unclassified)
            {
                MergeOrAdd(root, node, model);
            }

            return(root);
        }
예제 #18
0
        private static void Visualize(VectorNode node, StringBuilder output, int depth)
        {
            if (node == null)
            {
                return;
            }

            output.Append('\t', depth);
            output.AppendFormat($"{node} w:{node.Weight}");
            output.AppendLine();

            Visualize(node.Left, output, depth + 1);
            Visualize(node.Right, output, depth);
        }
예제 #19
0
        public static VectorNode CreateTree <T>(IModel <T> model, params T[] data)
        {
            var root = new VectorNode();

            foreach (var item in data)
            {
                foreach (var vector in model.Tokenize(item))
                {
                    MergeOrAdd(root, new VectorNode(vector), model);
                }
            }

            return(root);
        }
예제 #20
0
        public static VectorNode DeserializeNode(
            long vecOffset,
            long postingsOffset,
            long componentCount,
            long weight,
            long terminator,
            Stream vectorStream,
            IVectorSpaceConfig model)
        {
            var vector = VectorOperations.DeserializeVector(vecOffset, (int)componentCount, model.VectorWidth, vectorStream);
            var node   = new VectorNode(postingsOffset, vecOffset, terminator, weight, vector);

            return(node);
        }
예제 #21
0
        public static VectorNode DeserializeNode(
            long vecOffset,
            long postingsOffset,
            long componentCount,
            long weight,
            long terminator,
            Stream vectorStream,
            IDistanceCalculator model)
        {
            var vector = VectorOperations.DeserializeVector(vecOffset, (int)componentCount, model.NumOfDimensions, vectorStream);
            var node   = new VectorNode(postingsOffset, vecOffset, terminator, weight, vector);

            return(node);
        }
예제 #22
0
        public static VectorNode CreateTree <T>(this IModel <T> model, IIndexingStrategy indexingStrategy, params T[] data)
        {
            var root = new VectorNode();

            foreach (var item in data)
            {
                foreach (var vector in model.Tokenize(item))
                {
                    indexingStrategy.ExecutePut <T>(root, new VectorNode(vector));
                }
            }

            return(root);
        }
예제 #23
0
        public static void MergeOrAdd(
            VectorNode root,
            VectorNode node,
            IModel model,
            out VectorNode parent)
        {
            var cursor = root;

            while (true)
            {
                var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

                if (angle >= model.IdenticalAngle)
                {
                    parent = cursor;

                    break;
                }
                else if (angle > model.FoldAngle)
                {
                    if (cursor.Left == null)
                    {
                        cursor.Left = node;
                        parent      = cursor;

                        break;
                    }
                    else
                    {
                        cursor = cursor.Left;
                    }
                }
                else
                {
                    if (cursor.Right == null)
                    {
                        cursor.Right = node;
                        parent       = cursor;

                        break;
                    }
                    else
                    {
                        cursor = cursor.Right;
                    }
                }
            }
        }
예제 #24
0
        public static bool TryMerge(
            VectorNode root,
            VectorNode node,
            IDistanceCalculator model,
            double foldAngle,
            double identicalAngle,
            out VectorNode parent)
        {
            var cursor = root;

            while (true)
            {
                var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

                if (angle >= identicalAngle)
                {
                    parent = cursor;
                    return(true);
                }
                else if (angle > foldAngle)
                {
                    if (cursor.Left == null)
                    {
                        cursor.Left = node;
                        parent      = cursor;
                        return(false);
                    }
                    else
                    {
                        cursor = cursor.Left;
                    }
                }
                else
                {
                    if (cursor.Right == null)
                    {
                        cursor.Right = node;
                        parent       = cursor;
                        return(false);
                    }
                    else
                    {
                        cursor = cursor.Right;
                    }
                }
            }
        }
예제 #25
0
        public static void MergeOrAddSupervised(
            this VectorNode root,
            VectorNode node,
            IModel model)
        {
            var cursor = root;

            while (true)
            {
                var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

                if (angle >= model.IdenticalAngle)
                {
                    if (!cursor.Vector.Label.Equals(node.Vector.Label))
                    {
                        throw new InvalidOperationException($"IdenticalAngle {model.IdenticalAngle} is too low. Angle was {angle}");
                    }

                    MergeDocIds(cursor, node);
                    break;
                }
                else if (angle > model.FoldAngle)
                {
                    if (cursor.Left == null)
                    {
                        cursor.Left = node;
                        break;
                    }
                    else
                    {
                        cursor = cursor.Left;
                    }
                }
                else
                {
                    if (cursor.Right == null)
                    {
                        cursor.Right = node;
                        break;
                    }
                    else
                    {
                        cursor = cursor.Right;
                    }
                }
            }
        }
예제 #26
0
        private static void Visualize(VectorNode node, int depth, StringBuilder output)
        {
            if (node == null)
            {
                return;
            }

            output.Append('\t', depth);
            output.AppendFormat($"{node} w:{node.Weight} ");

            if (node.IsRoot)
            {
                output.AppendFormat($"{Size(node)}");
            }

            output.AppendLine();
        }
예제 #27
0
        public static bool TryAdd(
            this VectorNode root,
            VectorNode node,
            IModel model)
        {
            var cursor = root;

            while (true)
            {
                var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

                if (angle >= model.IdenticalAngle)
                {
                    return(false);
                }
                else if (angle > model.FoldAngle)
                {
                    if (cursor.Left == null)
                    {
                        cursor.Left = node;

                        return(true);
                    }
                    else
                    {
                        cursor = cursor.Left;
                    }
                }
                else
                {
                    if (cursor.Right == null)
                    {
                        cursor.Right = node;

                        return(true);
                    }
                    else
                    {
                        cursor = cursor.Right;
                    }
                }
            }
        }
예제 #28
0
        /// <summary>
        /// Persist tree to disk.
        /// </summary>
        /// <param name="node">Tree to perist.</param>
        /// <param name="indexStream">stream to perist tree into</param>
        /// <param name="vectorStream">stream to persist vectors in</param>
        /// <param name="postingsStream">optional stream to persist any posting references into</param>
        /// <returns></returns>
        public static (long offset, long length) SerializeTree(
            VectorNode node,
            Stream indexStream,
            Stream vectorStream,
            Stream postingsStream = null)
        {
            var stack  = new Stack <VectorNode>();
            var offset = indexStream.Position;
            var length = 0;

            if (node.ComponentCount == 0)
            {
                node = node.Right;
            }

            while (node != null)
            {
                if (node.PostingsOffset == -1 && postingsStream != null)
                {
                    SerializePostings(node, postingsStream);
                }

                node.VectorOffset = VectorOperations.SerializeVector(node.Vector, vectorStream);

                SerializeNode(node, indexStream);

                length += VectorNode.BlockSize;

                if (node.Right != null)
                {
                    stack.Push(node.Right);
                }

                node = node.Left;

                if (node == null && stack.Count > 0)
                {
                    node = stack.Pop();
                }
            }

            return(offset, length);
        }
예제 #29
0
        public static bool TryMergeOrAddSupervised(
            VectorNode root,
            VectorNode node,
            IModel model,
            out VectorNode unclassified)
        {
            var cursor = root;

            while (true)
            {
                var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

                if (angle > model.FoldAngle)
                {
                    if (node.Vector.Label.Equals(cursor.Vector.Label))
                    {
                        AddDocId(cursor, node);
                        cursor.Vector.Average(node.Vector);
                        unclassified = null;
                        return(true);
                    }
                    else
                    {
                        unclassified = node;
                        return(false);
                    }
                }
                else
                {
                    if (cursor.Right == null)
                    {
                        cursor.Right = node;
                        unclassified = null;
                        return(true);
                    }
                    else
                    {
                        cursor = cursor.Right;
                    }
                }
            }
        }
예제 #30
0
        public static VectorNode DeserializeTree(Stream indexStream, Stream vectorStream, long indexLength, IModel model)
        {
            VectorNode root   = new VectorNode();
            VectorNode cursor = root;
            var        tail   = new Stack <VectorNode>();
            int        read   = 0;
            var        buf    = new byte[VectorNode.BlockSize];

            while (read < indexLength)
            {
                indexStream.Read(buf);

                var node = DeserializeNode(buf, vectorStream, model);

                if (node.Terminator == 0) // there is both a left and a right child
                {
                    cursor.Left = node;
                    tail.Push(cursor);
                }
                else if (node.Terminator == 1) // there is a left but no right child
                {
                    cursor.Left = node;
                }
                else if (node.Terminator == 2) // there is a right but no left child
                {
                    cursor.Right = node;
                }
                else // there are no children
                {
                    if (tail.Count > 0)
                    {
                        tail.Pop().Right = node;
                    }
                }

                cursor = node;
                read  += VectorNode.BlockSize;
            }

            return(root);
        }