private static void CompareBaseless(string first, string second, IStringModel model) { var doc1 = new VectorNode(model.Tokenize(first.ToCharArray()).First()); var doc2 = new VectorNode(model.Tokenize(second.ToCharArray()).First()); var angle = model.CosAngle(doc1.Vector, doc2.Vector); Console.WriteLine($"similarity (baseless): {angle}"); }
private static void Compare(string first, string second, IStringModel model) { var baseVectorComponents = new List <float>(model.VectorWidth); var baseVectors = new List <IVector>(); for (int i = 0; i < model.VectorWidth; i++) { baseVectorComponents.Add(i == 0 ? 1 : Convert.ToSingle(Math.Log10(i))); var bvecs = new List <float>(model.VectorWidth); for (int y = 0; y < model.VectorWidth; y++) { float value; if (y == i) { value = 1; } else { value = 0; } bvecs.Add(value); } baseVectors.Add(new IndexedVector(bvecs, model.VectorWidth)); } var bvector = new IndexedVector(baseVectorComponents, model.VectorWidth); var doc1 = new VectorNode(model.Tokenize(first.ToCharArray()).First()); var doc2 = new VectorNode(model.Tokenize(second.ToCharArray()).First()); var angles1 = new List <float>(); var angles2 = new List <float>(); foreach (var bvec in baseVectors) { angles1.Add(Convert.ToSingle(model.CosAngle(doc1.Vector, bvec))); angles2.Add(Convert.ToSingle(model.CosAngle(doc2.Vector, bvec))); } var docVector1 = new IndexedVector(angles1, model.VectorWidth); var docVector2 = new IndexedVector(angles2, model.VectorWidth); var angle = model.CosAngle(docVector1, docVector2); var angle1 = model.CosAngle(docVector1, bvector); var angle2 = model.CosAngle(docVector2, bvector); Console.WriteLine($"similarity: {angle}"); Console.WriteLine($"bvector similarity 1: {angle1}"); Console.WriteLine($"bvector similarity 2: {angle2}"); Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}"); }
public void Warmup(IEnumerable <IDictionary> documents, params long[] excludeKeyIds) { foreach (var doc in documents) { foreach (var key in doc.Keys) { var strKey = key.ToString(); if (!strKey.StartsWith("__")) { var keyId = SessionFactory.GetKeyId(CollectionId, strKey.ToHash()); if (excludeKeyIds.Contains(keyId)) { continue; } var terms = _tokenizer.Tokenize(doc[key].ToString()); foreach (var token in terms.Embeddings .Select(t => t.ToString())) { _httpQueue.Enqueue(token); } } } } }
private static void RunInteractiveGraphBuilder(IStringModel model) { var root = new VectorNode(); while (true) { Console.WriteLine("enter text:"); var command = Console.ReadLine(); if (string.IsNullOrWhiteSpace(command)) { break; } var node = new VectorNode(model.Tokenize(command.ToCharArray()).First()); GraphBuilder.MergeOrAdd(root, node, model, model.FoldAngle, model.IdenticalAngle); } Console.WriteLine(root.Visualize()); while (true) { Console.WriteLine("enter query:"); var command = Console.ReadLine(); if (string.IsNullOrWhiteSpace(command)) { break; } var hit = PathFinder.ClosestMatch(root, model.Tokenize(command.ToCharArray()).First(), model); Console.WriteLine($"{hit.Score} {hit.Node}"); } }
public void Put(long docId, long keyId, string value) { var vectors = _model.Tokenize(value.ToCharArray()); var column = _index.GetOrAdd(keyId, new VectorNode()); foreach (var vector in vectors) { GraphBuilder.MergeOrAdd( column, new VectorNode(vector, docId), _model, _model.FoldAngle, _model.IdenticalAngle); } }
public void Put(long keyId, string value) { var vectors = _model.Tokenize(value.ToCharArray()); var column = _index.GetOrAdd(keyId, new VectorNode()); //Parallel.ForEach(vectors, vector => foreach (var vector in vectors) { GraphBuilder.AppendSynchronized( column, new VectorNode(vector), _model, _model.FoldAngle, _model.IdenticalAngle); }//); }
private IList <Term> ParseTerms(string collectionName, string key, string value, bool and, bool or, bool not) { var collectionId = collectionName.ToHash(); long keyId; var terms = new List <Term>(); if (_sessionFactory.TryGetKeyId(collectionId, key.ToHash(), out keyId)) { var tokens = _model.Tokenize(value.ToCharArray()); foreach (var term in tokens) { terms.Add(new Term(collectionId, keyId, key, term, and, or, not)); } } return(terms); }
public Query Parse(ulong collectionId, string query, IStringModel model) { Query root = null; Query cursor = null; var lines = query .Replace("\r", "\n") .Split('\n', StringSplitOptions.RemoveEmptyEntries); foreach (var line in lines) { if (line.IndexOf(':', 0, line.Length) < 0) { throw new ArgumentException( "Query syntax error. A query must define both a key and a value separated by a colon.", nameof(query)); } var parts = line.Split(':'); var key = parts[0]; var value = parts[1]; var values = model.Tokenize(value); var or = key[0] != '+' && key[0] != '-'; var not = key[0] == '-'; var and = !or && !not; if (Operators.Contains(key[0])) { key = key.Substring(1); } var q = new Query(collectionId, new Term(key, values, 0)) { And = and, Or = or, Not = not }; var qc = q; for (int i = 1; i < values.Embeddings.Count; i++) { qc.NextTermInClause = new Query(collectionId, new Term(key, values, i)) { And = and, Or = or, Not = not }; qc = qc.NextTermInClause; } if (cursor == null) { root = q; } else { var last = cursor; var next = last.NextClause; while (next != null) { last = next; next = last.NextClause; } last.NextClause = q; } cursor = q; } return(root); }