public static void MergePostings(this VectorNode target, VectorNode source) { if (source.PostingsOffsets != null) { ((List <long>)target.PostingsOffsets).AddRange(source.PostingsOffsets); } }
public static void AddDocId(VectorNode target, VectorNode node) { foreach (var docId in node.DocIds) { target.DocIds.Add(docId); } }
public static void Serialize(this VectorNode node, Stream stream) { long terminator = 1; if (node.Left == null && node.Right == null) // there are no children { terminator = 3; } else if (node.Left == null) // there is a right but no left { terminator = 2; } else if (node.Right == null) // there is a left but no right { terminator = 1; } else // there is a left and a right { terminator = 0; } Span <long> span = stackalloc long[5]; span[0] = node.VectorOffset; span[1] = node.PostingsOffset; span[2] = node.Vector.ComponentCount; span[3] = node.Weight; span[4] = terminator; stream.Write(MemoryMarshal.Cast <long, byte>(span)); }
public static void MergeDocIds(this VectorNode target, VectorNode source) { if (source.DocIds != null) { target.DocIds.AddRange(source.DocIds); } }
public static void SetIdsOnAllNodes(VectorNode root) { var node = root.ComponentCount == 0 ? root.Right : root; var stack = new Stack <VectorNode>(); long id = 0; while (node != null) { node.PostingsOffset = id++; if (node.Right != null) { stack.Push(node.Right); } node = node.Left; if (node == null) { if (stack.Count > 0) { node = stack.Pop(); } } } }
public static IEnumerable <VectorNode> All(VectorNode root) { var node = root.ComponentCount == 0 ? root.Right : root; var stack = new Stack <VectorNode>(); while (node != null) { yield return(node); if (node.Right != null) { stack.Push(node.Right); } node = node.Left; if (node == null) { if (stack.Count > 0) { node = stack.Pop(); } } } }
public static string Visualize(VectorNode root) { StringBuilder output = new StringBuilder(); Visualize(root, output, 0); return(output.ToString()); }
public static Hit ClosestMatch(VectorNode root, IVector vector, IModel model) { var best = root; var cursor = root; double highscore = 0; while (cursor != null) { var angle = cursor.Vector == null ? 0 : model.CosAngle(vector, cursor.Vector); if (angle > model.FoldAngle) { if (angle > highscore) { highscore = angle; best = cursor; } cursor = cursor.Left; } else { if (angle > highscore) { highscore = angle; best = cursor; } cursor = cursor.Right; } } return(new Hit(best, highscore)); }
public static float[][] AsOneHotMatrix(VectorNode root) { var node = root.Vector == null ? root.Right : root; var stack = new Stack <VectorNode>(); var matrix = new float[root.Weight][]; var index = 0; while (node != null) { var vector = new float[root.Weight]; vector[index] = 1; matrix[index] = vector; index++; if (node.Right != null) { stack.Push(node.Right); } node = node.Left; if (node == null) { if (stack.Count > 0) { node = stack.Pop(); } } } return(matrix); }
public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, Stream postingsStream, PageIndexWriter pageIndexWriter) { var page = column.SerializeTree(_ixStream, vectorStream, postingsStream); pageIndexWriter.Put(page.offset, page.length); return(PathFinder.Size(column)); }
public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, PageIndexWriter pageIndexWriter) { var page = GraphBuilder.SerializeTree(column, _ixStream, vectorStream, null); pageIndexWriter.Put(page.offset, page.length); return(PathFinder.Size(column)); }
public static long AppendSynchronized( VectorNode root, VectorNode node, IDistanceCalculator model, double foldAngle, double identicalAngle) { var cursor = root; while (true) { var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector); if (angle >= identicalAngle) { return(cursor.PostingsOffset); } else if (angle > foldAngle) { if (cursor.Left == null) { lock (cursor) { if (cursor.Left == null) { node.PostingsOffset = root.Weight; cursor.Left = node; return(node.PostingsOffset); } } } else { cursor = cursor.Left; } } else { if (cursor.Right == null) { lock (cursor) { if (cursor.Right == null) { node.PostingsOffset = root.Weight; cursor.Right = node; return(node.PostingsOffset); } } } else { cursor = cursor.Right; } } } }
public static void MergeDocIdsConcurrent(this VectorNode target, VectorNode source) { lock (target.Sync) { if (source.DocIds != null) { target.DocIds.AddRange(source.DocIds); } } }
public static IEnumerable <VectorNode> LeftList(VectorNode root) { var node = root.Left; while (node != null) { yield return(node); node = node.Left; } }
public static void AddRight(VectorNode parent, VectorNode node) { var target = parent; while (target.Right != null) { target = target.Right; } node.Right = target.Right; target.Right = node; }
public static int Depth(VectorNode node) { var count = 0; node = node.Left; while (node != null) { count++; node = node.Left; } return(count); }
public static VectorNode Train <T>(IModel <T> model, params T[] data) { var root = new VectorNode(); var unclassified = new Queue <VectorNode>(); foreach (var item in data) { foreach (var vector in model.Tokenize(item)) { VectorNode node; if (!TryMergeOrAddSupervised(root, new VectorNode(vector), model, out node)) { unclassified.Enqueue(node); } } } var batchSize = unclassified.Count; var numOfIterations = 0; var lastCount = 0; while (unclassified.Count > 0) { VectorNode node; if (!TryMergeOrAddSupervised(root, unclassified.Dequeue(), model, out node)) { unclassified.Enqueue(node); } if (++numOfIterations % batchSize == 0) { if (lastCount == unclassified.Count) { break; } else { lastCount = unclassified.Count; } } } foreach (var node in unclassified) { MergeOrAdd(root, node, model); } return(root); }
private static void Visualize(VectorNode node, StringBuilder output, int depth) { if (node == null) { return; } output.Append('\t', depth); output.AppendFormat($"{node} w:{node.Weight}"); output.AppendLine(); Visualize(node.Left, output, depth + 1); Visualize(node.Right, output, depth); }
public static VectorNode CreateTree <T>(IModel <T> model, params T[] data) { var root = new VectorNode(); foreach (var item in data) { foreach (var vector in model.Tokenize(item)) { MergeOrAdd(root, new VectorNode(vector), model); } } return(root); }
public static VectorNode DeserializeNode( long vecOffset, long postingsOffset, long componentCount, long weight, long terminator, Stream vectorStream, IVectorSpaceConfig model) { var vector = VectorOperations.DeserializeVector(vecOffset, (int)componentCount, model.VectorWidth, vectorStream); var node = new VectorNode(postingsOffset, vecOffset, terminator, weight, vector); return(node); }
public static VectorNode DeserializeNode( long vecOffset, long postingsOffset, long componentCount, long weight, long terminator, Stream vectorStream, IDistanceCalculator model) { var vector = VectorOperations.DeserializeVector(vecOffset, (int)componentCount, model.NumOfDimensions, vectorStream); var node = new VectorNode(postingsOffset, vecOffset, terminator, weight, vector); return(node); }
public static VectorNode CreateTree <T>(this IModel <T> model, IIndexingStrategy indexingStrategy, params T[] data) { var root = new VectorNode(); foreach (var item in data) { foreach (var vector in model.Tokenize(item)) { indexingStrategy.ExecutePut <T>(root, new VectorNode(vector)); } } return(root); }
public static void MergeOrAdd( VectorNode root, VectorNode node, IModel model, out VectorNode parent) { var cursor = root; while (true) { var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector); if (angle >= model.IdenticalAngle) { parent = cursor; break; } else if (angle > model.FoldAngle) { if (cursor.Left == null) { cursor.Left = node; parent = cursor; break; } else { cursor = cursor.Left; } } else { if (cursor.Right == null) { cursor.Right = node; parent = cursor; break; } else { cursor = cursor.Right; } } } }
public static bool TryMerge( VectorNode root, VectorNode node, IDistanceCalculator model, double foldAngle, double identicalAngle, out VectorNode parent) { var cursor = root; while (true) { var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector); if (angle >= identicalAngle) { parent = cursor; return(true); } else if (angle > foldAngle) { if (cursor.Left == null) { cursor.Left = node; parent = cursor; return(false); } else { cursor = cursor.Left; } } else { if (cursor.Right == null) { cursor.Right = node; parent = cursor; return(false); } else { cursor = cursor.Right; } } } }
public static void MergeOrAddSupervised( this VectorNode root, VectorNode node, IModel model) { var cursor = root; while (true) { var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector); if (angle >= model.IdenticalAngle) { if (!cursor.Vector.Label.Equals(node.Vector.Label)) { throw new InvalidOperationException($"IdenticalAngle {model.IdenticalAngle} is too low. Angle was {angle}"); } MergeDocIds(cursor, node); break; } else if (angle > model.FoldAngle) { if (cursor.Left == null) { cursor.Left = node; break; } else { cursor = cursor.Left; } } else { if (cursor.Right == null) { cursor.Right = node; break; } else { cursor = cursor.Right; } } } }
private static void Visualize(VectorNode node, int depth, StringBuilder output) { if (node == null) { return; } output.Append('\t', depth); output.AppendFormat($"{node} w:{node.Weight} "); if (node.IsRoot) { output.AppendFormat($"{Size(node)}"); } output.AppendLine(); }
public static bool TryAdd( this VectorNode root, VectorNode node, IModel model) { var cursor = root; while (true) { var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector); if (angle >= model.IdenticalAngle) { return(false); } else if (angle > model.FoldAngle) { if (cursor.Left == null) { cursor.Left = node; return(true); } else { cursor = cursor.Left; } } else { if (cursor.Right == null) { cursor.Right = node; return(true); } else { cursor = cursor.Right; } } } }
/// <summary> /// Persist tree to disk. /// </summary> /// <param name="node">Tree to perist.</param> /// <param name="indexStream">stream to perist tree into</param> /// <param name="vectorStream">stream to persist vectors in</param> /// <param name="postingsStream">optional stream to persist any posting references into</param> /// <returns></returns> public static (long offset, long length) SerializeTree( VectorNode node, Stream indexStream, Stream vectorStream, Stream postingsStream = null) { var stack = new Stack <VectorNode>(); var offset = indexStream.Position; var length = 0; if (node.ComponentCount == 0) { node = node.Right; } while (node != null) { if (node.PostingsOffset == -1 && postingsStream != null) { SerializePostings(node, postingsStream); } node.VectorOffset = VectorOperations.SerializeVector(node.Vector, vectorStream); SerializeNode(node, indexStream); length += VectorNode.BlockSize; if (node.Right != null) { stack.Push(node.Right); } node = node.Left; if (node == null && stack.Count > 0) { node = stack.Pop(); } } return(offset, length); }
public static bool TryMergeOrAddSupervised( VectorNode root, VectorNode node, IModel model, out VectorNode unclassified) { var cursor = root; while (true) { var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector); if (angle > model.FoldAngle) { if (node.Vector.Label.Equals(cursor.Vector.Label)) { AddDocId(cursor, node); cursor.Vector.Average(node.Vector); unclassified = null; return(true); } else { unclassified = node; return(false); } } else { if (cursor.Right == null) { cursor.Right = node; unclassified = null; return(true); } else { cursor = cursor.Right; } } } }
public static VectorNode DeserializeTree(Stream indexStream, Stream vectorStream, long indexLength, IModel model) { VectorNode root = new VectorNode(); VectorNode cursor = root; var tail = new Stack <VectorNode>(); int read = 0; var buf = new byte[VectorNode.BlockSize]; while (read < indexLength) { indexStream.Read(buf); var node = DeserializeNode(buf, vectorStream, model); if (node.Terminator == 0) // there is both a left and a right child { cursor.Left = node; tail.Push(cursor); } else if (node.Terminator == 1) // there is a left but no right child { cursor.Left = node; } else if (node.Terminator == 2) // there is a right but no left child { cursor.Right = node; } else // there are no children { if (tail.Count > 0) { tail.Pop().Right = node; } } cursor = node; read += VectorNode.BlockSize; } return(root); }