private static void Compare(string first, string second, IStringModel model) { var baseVectorComponents = new List <float>(model.VectorWidth); var baseVectors = new List <IVector>(); for (int i = 0; i < model.VectorWidth; i++) { baseVectorComponents.Add(i == 0 ? 1 : Convert.ToSingle(Math.Log10(i))); var bvecs = new List <float>(model.VectorWidth); for (int y = 0; y < model.VectorWidth; y++) { float value; if (y == i) { value = 1; } else { value = 0; } bvecs.Add(value); } baseVectors.Add(new IndexedVector(bvecs, model.VectorWidth)); } var bvector = new IndexedVector(baseVectorComponents, model.VectorWidth); var doc1 = new VectorNode(model.Tokenize(first.ToCharArray()).First()); var doc2 = new VectorNode(model.Tokenize(second.ToCharArray()).First()); var angles1 = new List <float>(); var angles2 = new List <float>(); foreach (var bvec in baseVectors) { angles1.Add(Convert.ToSingle(model.CosAngle(doc1.Vector, bvec))); angles2.Add(Convert.ToSingle(model.CosAngle(doc2.Vector, bvec))); } var docVector1 = new IndexedVector(angles1, model.VectorWidth); var docVector2 = new IndexedVector(angles2, model.VectorWidth); var angle = model.CosAngle(docVector1, docVector2); var angle1 = model.CosAngle(docVector1, bvector); var angle2 = model.CosAngle(docVector2, bvector); Console.WriteLine($"similarity: {angle}"); Console.WriteLine($"bvector similarity 1: {angle1}"); Console.WriteLine($"bvector similarity 2: {angle2}"); Console.WriteLine($"base vector similarity: {Math.Min(angle1, angle2) / Math.Max(angle1, angle2)}"); }
private static void CompareBaseless(string first, string second, IStringModel model) { var doc1 = new VectorNode(model.Tokenize(first.ToCharArray()).First()); var doc2 = new VectorNode(model.Tokenize(second.ToCharArray()).First()); var angle = model.CosAngle(doc1.Vector, doc2.Vector); Console.WriteLine($"similarity (baseless): {angle}"); }
public static Hit ClosestMatch(VectorNode root, Vector vector, IStringModel model) { var best = root; var cursor = root; float highscore = 0; while (cursor != null) { var angle = model.CosAngle(vector, cursor.Vector); if (angle > model.FoldAngle) { if (angle > highscore) { highscore = angle; best = cursor; } cursor = cursor.Left; } else { if (angle > highscore) { highscore = angle; best = cursor; } cursor = cursor.Right; } } return(new Hit { Score = highscore, Node = best }); }
public static bool Add(VectorNode root, VectorNode node, IStringModel model) { var cursor = root; while (cursor != null) { var angle = cursor.Vector.Count > 0 ? model.CosAngle(node.Vector, cursor.Vector) : 0; if (angle >= model.IdenticalAngle) { lock (cursor.Sync) { Merge(cursor, node); return(false); } } else if (angle > model.FoldAngle) { if (cursor.Left == null) { lock (cursor.Sync) { if (cursor.Left == null) { node.AngleWhenAdded = angle; cursor.Left = node; return(true); } else { cursor = cursor.Left; } } } else { cursor = cursor.Left; } } else { if (cursor.Right == null) { lock (cursor.Sync) { if (cursor.Right == null) { node.AngleWhenAdded = angle; cursor.Right = node; return(true); } else { cursor = cursor.Right; } } } else { cursor = cursor.Right; } } } return(false); }
private Hit ClosestMatchInPage( Vector vector, Stream indexStream, Stream vectorStream, IStringModel model ) { Span <byte> block = stackalloc byte[VectorNode.BlockSize]; var read = indexStream.Read(block); VectorNode best = null; float highscore = 0; while (read > 0) { var vecOffset = BitConverter.ToInt64(block.Slice(0, sizeof(long))); var componentCount = BitConverter.ToInt64(block.Slice(sizeof(long) + sizeof(long), sizeof(long))); var cursorVector = model.DeserializeVector(vecOffset, (int)componentCount, vectorStream); var cursorTerminator = BitConverter.ToInt64(block.Slice(sizeof(long) + sizeof(long) + sizeof(long) + sizeof(long), sizeof(long))); var postingsOffset = BitConverter.ToInt64(block.Slice(sizeof(long), sizeof(long))); var angle = model.CosAngle(cursorVector, vector); if (angle >= model.IdenticalAngle) { if (best == null || angle > highscore) { highscore = angle; best = new VectorNode(cursorVector); best.PostingsOffsets = new List <long> { postingsOffset }; } else if (angle == highscore) { if (best.PostingsOffsets == null) { best.PostingsOffsets = new List <long> { best.PostingsOffset, postingsOffset }; } else { best.PostingsOffsets.Add(postingsOffset); } } break; } else if (angle > model.FoldAngle) { if (best == null || angle > highscore) { highscore = angle; best = new VectorNode(cursorVector); best.PostingsOffsets = new List <long> { postingsOffset }; } else if (angle == highscore) { if (best.PostingsOffsets == null) { best.PostingsOffsets = new List <long> { best.PostingsOffset, postingsOffset }; } else { best.PostingsOffsets.Add(postingsOffset); } } // We need to determine if we can traverse further left. bool canGoLeft = cursorTerminator == 0 || cursorTerminator == 1; if (canGoLeft) { // There exists either a left and a right child or just a left child. // Either way, we want to go left and the next node in bitmap is the left child. read = indexStream.Read(block); } else { // There is no left child. break; } } else { if (best == null || angle > highscore) { highscore = angle; best = new VectorNode(cursorVector); best.PostingsOffsets = new List <long> { postingsOffset }; } else if (angle == highscore) { if (best.PostingsOffsets == null) { best.PostingsOffsets = new List <long> { best.PostingsOffset, postingsOffset }; } else { best.PostingsOffsets.Add(postingsOffset); } } // We need to determine if we can traverse further to the right. if (cursorTerminator == 0) { // There exists a left and a right child. // Next node in bitmap is the left child. // To find cursor's right child we must skip over the left tree. SkipTree(indexStream); read = indexStream.Read(block); } else if (cursorTerminator == 2) { // Next node in bitmap is the right child, // which is good because we want to go right. read = indexStream.Read(block); } else { // There is no right child. break; } } } return(new Hit { Score = highscore, Node = best }); }