Esempio n. 1
0
        private static void Compare(string first, string second, IStringModel model)
        {
            var baseVectorComponents = new List <float>(model.VectorWidth);
            var baseVectors          = new List <IVector>();

            for (int i = 0; i < model.VectorWidth; i++)
            {
                baseVectorComponents.Add(i == 0 ? 1 : Convert.ToSingle(Math.Log10(i)));

                var bvecs = new List <float>(model.VectorWidth);

                for (int y = 0; y < model.VectorWidth; y++)
                {
                    float value;

                    if (y == i)
                    {
                        value = 1;
                    }
                    else
                    {
                        value = 0;
                    }

                    bvecs.Add(value);
                }

                baseVectors.Add(new IndexedVector(bvecs, model.VectorWidth));
            }

            var bvector = new IndexedVector(baseVectorComponents, model.VectorWidth);

            var doc1    = new VectorNode(model.Tokenize(first.ToCharArray()).First());
            var doc2    = new VectorNode(model.Tokenize(second.ToCharArray()).First());
            var angles1 = new List <float>();
            var angles2 = new List <float>();

            foreach (var bvec in baseVectors)
            {
                angles1.Add(Convert.ToSingle(model.CosAngle(doc1.Vector, bvec)));
                angles2.Add(Convert.ToSingle(model.CosAngle(doc2.Vector, bvec)));
            }

            var docVector1 = new IndexedVector(angles1, model.VectorWidth);
            var docVector2 = new IndexedVector(angles2, model.VectorWidth);

            var angle  = model.CosAngle(docVector1, docVector2);
            var angle1 = model.CosAngle(docVector1, bvector);
            var angle2 = model.CosAngle(docVector2, bvector);

            Console.WriteLine($"similarity: {angle}");
            Console.WriteLine($"bvector similarity 1: {angle1}");
            Console.WriteLine($"bvector similarity 2: {angle2}");
            Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}");
        }
Esempio n. 2
0
        private static void CompareBaseless(string first, string second, IStringModel model)
        {
            var doc1 = new VectorNode(model.Tokenize(first.ToCharArray()).First());
            var doc2 = new VectorNode(model.Tokenize(second.ToCharArray()).First());

            var angle = model.CosAngle(doc1.Vector, doc2.Vector);

            Console.WriteLine($"similarity (baseless): {angle}");
        }
Esempio n. 3
0
        public static Hit ClosestMatch(VectorNode root, Vector vector, IStringModel model)
        {
            var   best      = root;
            var   cursor    = root;
            float highscore = 0;

            while (cursor != null)
            {
                var angle = model.CosAngle(vector, cursor.Vector);

                if (angle > model.FoldAngle)
                {
                    if (angle > highscore)
                    {
                        highscore = angle;
                        best      = cursor;
                    }

                    cursor = cursor.Left;
                }
                else
                {
                    if (angle > highscore)
                    {
                        highscore = angle;
                        best      = cursor;
                    }
                    cursor = cursor.Right;
                }
            }

            return(new Hit
            {
                Score = highscore,
                Node = best
            });
        }
Esempio n. 4
0
        public static bool Add(VectorNode root, VectorNode node, IStringModel model)
        {
            var cursor = root;

            while (cursor != null)
            {
                var angle = cursor.Vector.Count > 0 ? model.CosAngle(node.Vector, cursor.Vector) : 0;

                if (angle >= model.IdenticalAngle)
                {
                    lock (cursor.Sync)
                    {
                        Merge(cursor, node);

                        return(false);
                    }
                }
                else if (angle > model.FoldAngle)
                {
                    if (cursor.Left == null)
                    {
                        lock (cursor.Sync)
                        {
                            if (cursor.Left == null)
                            {
                                node.AngleWhenAdded = angle;
                                cursor.Left         = node;

                                return(true);
                            }
                            else
                            {
                                cursor = cursor.Left;
                            }
                        }
                    }
                    else
                    {
                        cursor = cursor.Left;
                    }
                }
                else
                {
                    if (cursor.Right == null)
                    {
                        lock (cursor.Sync)
                        {
                            if (cursor.Right == null)
                            {
                                node.AngleWhenAdded = angle;
                                cursor.Right        = node;

                                return(true);
                            }
                            else
                            {
                                cursor = cursor.Right;
                            }
                        }
                    }
                    else
                    {
                        cursor = cursor.Right;
                    }
                }
            }

            return(false);
        }
Esempio n. 5
0
        private Hit ClosestMatchInPage(
            Vector vector,
            Stream indexStream,
            Stream vectorStream,
            IStringModel model
            )
        {
            Span <byte> block = stackalloc byte[VectorNode.BlockSize];

            var read = indexStream.Read(block);

            VectorNode best      = null;
            float      highscore = 0;

            while (read > 0)
            {
                var vecOffset        = BitConverter.ToInt64(block.Slice(0, sizeof(long)));
                var componentCount   = BitConverter.ToInt64(block.Slice(sizeof(long) + sizeof(long), sizeof(long)));
                var cursorVector     = model.DeserializeVector(vecOffset, (int)componentCount, vectorStream);
                var cursorTerminator = BitConverter.ToInt64(block.Slice(sizeof(long) + sizeof(long) + sizeof(long) + sizeof(long), sizeof(long)));
                var postingsOffset   = BitConverter.ToInt64(block.Slice(sizeof(long), sizeof(long)));
                var angle            = model.CosAngle(cursorVector, vector);

                if (angle >= model.IdenticalAngle)
                {
                    if (best == null || angle > highscore)
                    {
                        highscore            = angle;
                        best                 = new VectorNode(cursorVector);
                        best.PostingsOffsets = new List <long> {
                            postingsOffset
                        };
                    }
                    else if (angle == highscore)
                    {
                        if (best.PostingsOffsets == null)
                        {
                            best.PostingsOffsets = new List <long> {
                                best.PostingsOffset, postingsOffset
                            };
                        }
                        else
                        {
                            best.PostingsOffsets.Add(postingsOffset);
                        }
                    }

                    break;
                }
                else if (angle > model.FoldAngle)
                {
                    if (best == null || angle > highscore)
                    {
                        highscore            = angle;
                        best                 = new VectorNode(cursorVector);
                        best.PostingsOffsets = new List <long> {
                            postingsOffset
                        };
                    }
                    else if (angle == highscore)
                    {
                        if (best.PostingsOffsets == null)
                        {
                            best.PostingsOffsets = new List <long> {
                                best.PostingsOffset, postingsOffset
                            };
                        }
                        else
                        {
                            best.PostingsOffsets.Add(postingsOffset);
                        }
                    }

                    // We need to determine if we can traverse further left.
                    bool canGoLeft = cursorTerminator == 0 || cursorTerminator == 1;

                    if (canGoLeft)
                    {
                        // There exists either a left and a right child or just a left child.
                        // Either way, we want to go left and the next node in bitmap is the left child.

                        read = indexStream.Read(block);
                    }
                    else
                    {
                        // There is no left child.

                        break;
                    }
                }
                else
                {
                    if (best == null || angle > highscore)
                    {
                        highscore            = angle;
                        best                 = new VectorNode(cursorVector);
                        best.PostingsOffsets = new List <long> {
                            postingsOffset
                        };
                    }
                    else if (angle == highscore)
                    {
                        if (best.PostingsOffsets == null)
                        {
                            best.PostingsOffsets = new List <long> {
                                best.PostingsOffset, postingsOffset
                            };
                        }
                        else
                        {
                            best.PostingsOffsets.Add(postingsOffset);
                        }
                    }

                    // We need to determine if we can traverse further to the right.

                    if (cursorTerminator == 0)
                    {
                        // There exists a left and a right child.
                        // Next node in bitmap is the left child.
                        // To find cursor's right child we must skip over the left tree.

                        SkipTree(indexStream);
                        read = indexStream.Read(block);
                    }
                    else if (cursorTerminator == 2)
                    {
                        // Next node in bitmap is the right child,
                        // which is good because we want to go right.

                        read = indexStream.Read(block);
                    }
                    else
                    {
                        // There is no right child.

                        break;
                    }
                }
            }

            return(new Hit
            {
                Score = highscore,
                Node = best
            });
        }