Example #1
0
        private static void CompareBaseless(string first, string second, IStringModel model)
        {
            var doc1 = new VectorNode(model.Tokenize(first.ToCharArray()).First());
            var doc2 = new VectorNode(model.Tokenize(second.ToCharArray()).First());

            var angle = model.CosAngle(doc1.Vector, doc2.Vector);

            Console.WriteLine($"similarity (baseless): {angle}");
        }
Example #2
0
        private static void Compare(string first, string second, IStringModel model)
        {
            var baseVectorComponents = new List <float>(model.VectorWidth);
            var baseVectors          = new List <IVector>();

            for (int i = 0; i < model.VectorWidth; i++)
            {
                baseVectorComponents.Add(i == 0 ? 1 : Convert.ToSingle(Math.Log10(i)));

                var bvecs = new List <float>(model.VectorWidth);

                for (int y = 0; y < model.VectorWidth; y++)
                {
                    float value;

                    if (y == i)
                    {
                        value = 1;
                    }
                    else
                    {
                        value = 0;
                    }

                    bvecs.Add(value);
                }

                baseVectors.Add(new IndexedVector(bvecs, model.VectorWidth));
            }

            var bvector = new IndexedVector(baseVectorComponents, model.VectorWidth);

            var doc1    = new VectorNode(model.Tokenize(first.ToCharArray()).First());
            var doc2    = new VectorNode(model.Tokenize(second.ToCharArray()).First());
            var angles1 = new List <float>();
            var angles2 = new List <float>();

            foreach (var bvec in baseVectors)
            {
                angles1.Add(Convert.ToSingle(model.CosAngle(doc1.Vector, bvec)));
                angles2.Add(Convert.ToSingle(model.CosAngle(doc2.Vector, bvec)));
            }

            var docVector1 = new IndexedVector(angles1, model.VectorWidth);
            var docVector2 = new IndexedVector(angles2, model.VectorWidth);

            var angle  = model.CosAngle(docVector1, docVector2);
            var angle1 = model.CosAngle(docVector1, bvector);
            var angle2 = model.CosAngle(docVector2, bvector);

            Console.WriteLine($"similarity: {angle}");
            Console.WriteLine($"bvector similarity 1: {angle1}");
            Console.WriteLine($"bvector similarity 2: {angle2}");
            Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}");
        }
Example #3
0
        public void Warmup(IEnumerable <IDictionary> documents, params long[] excludeKeyIds)
        {
            foreach (var doc in documents)
            {
                foreach (var key in doc.Keys)
                {
                    var strKey = key.ToString();

                    if (!strKey.StartsWith("__"))
                    {
                        var keyId = SessionFactory.GetKeyId(CollectionId, strKey.ToHash());

                        if (excludeKeyIds.Contains(keyId))
                        {
                            continue;
                        }

                        var terms = _tokenizer.Tokenize(doc[key].ToString());

                        foreach (var token in terms.Embeddings
                                 .Select(t => t.ToString()))
                        {
                            _httpQueue.Enqueue(token);
                        }
                    }
                }
            }
        }
Example #4
0
        private static void RunInteractiveGraphBuilder(IStringModel model)
        {
            var root = new VectorNode();

            while (true)
            {
                Console.WriteLine("enter text:");

                var command = Console.ReadLine();

                if (string.IsNullOrWhiteSpace(command))
                {
                    break;
                }

                var node = new VectorNode(model.Tokenize(command.ToCharArray()).First());

                GraphBuilder.MergeOrAdd(root, node, model, model.FoldAngle, model.IdenticalAngle);
            }

            Console.WriteLine(root.Visualize());

            while (true)
            {
                Console.WriteLine("enter query:");

                var command = Console.ReadLine();

                if (string.IsNullOrWhiteSpace(command))
                {
                    break;
                }

                var hit = PathFinder.ClosestMatch(root, model.Tokenize(command.ToCharArray()).First(), model);

                Console.WriteLine($"{hit.Score} {hit.Node}");
            }
        }
Example #5
0
        public void Put(long docId, long keyId, string value)
        {
            var vectors = _model.Tokenize(value.ToCharArray());
            var column  = _index.GetOrAdd(keyId, new VectorNode());

            foreach (var vector in vectors)
            {
                GraphBuilder.MergeOrAdd(
                    column,
                    new VectorNode(vector, docId),
                    _model,
                    _model.FoldAngle,
                    _model.IdenticalAngle);
            }
        }
Example #6
0
        public void Put(long keyId, string value)
        {
            var vectors = _model.Tokenize(value.ToCharArray());
            var column  = _index.GetOrAdd(keyId, new VectorNode());

            //Parallel.ForEach(vectors, vector =>
            foreach (var vector in vectors)
            {
                GraphBuilder.AppendSynchronized(
                    column,
                    new VectorNode(vector),
                    _model,
                    _model.FoldAngle,
                    _model.IdenticalAngle);
            }//);
        }
Example #7
0
        private IList <Term> ParseTerms(string collectionName, string key, string value, bool and, bool or, bool not)
        {
            var  collectionId = collectionName.ToHash();
            long keyId;
            var  terms = new List <Term>();

            if (_sessionFactory.TryGetKeyId(collectionId, key.ToHash(), out keyId))
            {
                var tokens = _model.Tokenize(value.ToCharArray());

                foreach (var term in tokens)
                {
                    terms.Add(new Term(collectionId, keyId, key, term, and, or, not));
                }
            }

            return(terms);
        }
Example #8
0
        public Query Parse(ulong collectionId, string query, IStringModel model)
        {
            Query root   = null;
            Query cursor = null;
            var   lines  = query
                           .Replace("\r", "\n")
                           .Split('\n', StringSplitOptions.RemoveEmptyEntries);

            foreach (var line in lines)
            {
                if (line.IndexOf(':', 0, line.Length) < 0)
                {
                    throw new ArgumentException(
                              "Query syntax error. A query must define both a key and a value separated by a colon.", nameof(query));
                }

                var parts  = line.Split(':');
                var key    = parts[0];
                var value  = parts[1];
                var values = model.Tokenize(value);
                var or     = key[0] != '+' && key[0] != '-';
                var not    = key[0] == '-';
                var and    = !or && !not;

                if (Operators.Contains(key[0]))
                {
                    key = key.Substring(1);
                }

                var q = new Query(collectionId, new Term(key, values, 0))
                {
                    And = and, Or = or, Not = not
                };
                var qc = q;

                for (int i = 1; i < values.Embeddings.Count; i++)
                {
                    qc.NextTermInClause = new Query(collectionId, new Term(key, values, i))
                    {
                        And = and, Or = or, Not = not
                    };
                    qc = qc.NextTermInClause;
                }

                if (cursor == null)
                {
                    root = q;
                }
                else
                {
                    var last = cursor;
                    var next = last.NextClause;

                    while (next != null)
                    {
                        last = next;
                        next = last.NextClause;
                    }

                    last.NextClause = q;
                }

                cursor = q;
            }

            return(root);
        }