/// <summary> /// This does a new SOM for one node (sort of like recursing on a node) /// </summary> /// <param name="index">The node to break apart</param> private static Tuple <bool, SOMNode[]> SplitNode(int index, SOMResult result, int minNodeItemsForSplit, double maxSpreadPercent, double totalSpread, SOMRules rules) { ISOMInput[] inputs = result.InputsByNode[index]; // Don't split if there aren't enough inputs in the parent if (inputs.Length < minNodeItemsForSplit) { return(Tuple.Create(false, new[] { result.Nodes[index] })); } // See how this node's distances from the average compare with the total double nodeSpread = GetTotalSpread(inputs.Select(o => o.Weights)); double percentSpread = nodeSpread / totalSpread; if (percentSpread < maxSpreadPercent) { return(Tuple.Create(false, new[] { result.Nodes[index] })); } // Get random node weights. Don't let any of those weights be closer to other nodes than this node VectorND[] weights = GetRandomWeights_InsideCell(rules.NumNodes, inputs, result.Nodes, index); SOMNode[] nodes = Enumerable.Range(0, rules.NumNodes). Select(o => new SOMNode() { Weights = weights[o] }). ToArray(); // Split up this node SOMResult subResult = TrainSOM(nodes, inputs, rules, false); return(Tuple.Create(true, subResult.Nodes)); }
/// <summary> /// This creates solid colored blobs with areas proportional to the number of items contained. When the user /// mouses over a blob, the caller can show examples of the items as tooltips /// </summary> public static void ShowResults2D_Blobs(Border border, SOMResult result, Func <SOMNode, Color> getNodeColor, BlobEvents events = null) { #region validate #if DEBUG if (!result.Nodes.All(o => o.Position.Size == 2)) { throw new ArgumentException("Node positions need to be 2D"); } #endif #endregion Point[] points = result.Nodes. Select(o => new Point(o.Position[0], o.Position[1])). ToArray(); VoronoiResult2D voronoi = Math2D.GetVoronoi(points, true); voronoi = Math2D.CapVoronoiCircle(voronoi); Color[] colors = result.Nodes. Select(o => getNodeColor(o)). ToArray(); //ISOMInput[][] inputsByNode = UtilityCore.ConvertJaggedArray<ISOMInput>(result.InputsByNode); Vector size = new Vector(border.ActualWidth - border.Padding.Left - border.Padding.Right, border.ActualHeight - border.Padding.Top - border.Padding.Bottom); Canvas canvas = DrawVoronoi_Blobs(voronoi, colors, result.Nodes, result.InputsByNode, size.X.ToInt_Floor(), size.Y.ToInt_Floor(), events); border.Child = canvas; }
public static SOMResult ArrangeNodes_LikesAttract(SOMResult result) { VectorND[] weights = result.Nodes. Select(o => o.Weights). ToArray(); // Get the high dimension distances var desiredDistances = MathND.GetDistancesBetween(weights); // Merge nodes that have the same high dimension position if (MergeTouchingNodes(ref result, desiredDistances)) { // Redo it weights = result.Nodes. Select(o => o.Weights). ToArray(); desiredDistances = MathND.GetDistancesBetween(weights); } // Pull the low dimension positions to try to match the high dimension distances //NOTE: This has no effect on InputsByNode (those are high dimension) SOMNode[] nodes = MoveNodes_BallOfSprings(result.Nodes, desiredDistances, 1500); return(new SOMResult(nodes, result.InputsByNode, result.IncludesEmptyNodes)); }
/// <summary> /// K-Means is a simpler algorithm than SOM /// </summary> /// <remarks> /// https://en.wikipedia.org/wiki/K-means_clustering /// /// SOM will make better quality clusters (but could fail and just make 1 or 2 massive clusters), K-Means is a good way of guaranteeing /// a certain number of clusters /// /// K-Means is a very different algorithm than SOM, but the inputs and outputs look the same, and the goal is very similar. So /// throwing it in this class /// </remarks> public static SOMResult TrainKMeans(ISOMInput[] inputs, int numNodes, bool isDisplay2D) { SOMResult retVal = TrainKMeans(numNodes, inputs); // Inject positions into the nodes InjectNodePositions2D(retVal.Nodes); //TODO: Look at isDisplay2D retVal = ArrangeNodes_LikesAttract(retVal); return(retVal); }
private static SOMResult TrainSOM(SOMNode[] nodes, ISOMInput[] inputs, SOMRules rules, bool returnEmptyNodes = false) { double mapRadius = MathND.GetRadius(MathND.GetAABB(nodes.Select(o => o.Weights))); SOMNode[] returnNodes = nodes. Select(o => o.Clone()). ToArray(); double timeConstant = rules.NumIterations / Math.Log(mapRadius); int iteration = 0; int remainingIterations = rules.NumIterations; while (remainingIterations > 0) { foreach (ISOMInput input in UtilityCore.RandomOrder(inputs, Math.Min(remainingIterations, inputs.Length))) { // Find closest node SOMNode closest = GetClosest(returnNodes, input).Item1; // Find other affected nodes (a node and distance squared) double searchRadius = mapRadius * rules.InitialRadiusPercent * Math.Exp(-iteration / timeConstant); Tuple <SOMNode, double>[] neigbors = GetNeighbors(returnNodes, closest, searchRadius); double learningRate = rules.LearningRate * Math.Exp(-(double)iteration / (double)rules.NumIterations); // Adjust the matched node (full learning rate) AdjustNodeWeights(closest, input.Weights, learningRate); foreach (var node in neigbors) { double influence = GetInfluence(rules.AttractionFunction, node.Item2, searchRadius); // Adjust a neighbor AdjustNodeWeights(node.Item1, input.Weights, learningRate * influence); } iteration++; } remainingIterations -= inputs.Length; } // See which images go with which nodes ISOMInput[][] inputsByNode = GetInputsByNode(returnNodes, inputs); SOMResult retVal = new SOMResult(returnNodes, inputsByNode, true); if (!returnEmptyNodes) { retVal = RemoveZeroNodes(retVal); } return(retVal); }
/// <summary> /// This version starts with a SOM, then potentially splits the largest node and/or gathers the smallest nodes into a single /// </summary> /// <returns></returns> public static SOMResult Train(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D) { SOMResult result = SelfOrganizingMaps.TrainSOM(inputs, rules, isDisplay2D); if (result.Nodes.Length == 0) { return(result); } else if (result.Nodes.Length == 1) { #region kmeans single node if (inputs.Length < 20) { return(result); } return(SelfOrganizingMaps.TrainKMeans(inputs, 5, true)); #endregion } var categorized = GetSOM_SplitMerge_Categorize(result); List <SOMNode> nodes = new List <SOMNode>(); List <ISOMInput[]> newInputs = new List <ISOMInput[]>(); foreach (NodeCombo set in UtilityCore.Iterate(categorized.kmeans, categorized.keep)) // UtilityCore.Iterate gracefully skips nulls { nodes.Add(set.Node); newInputs.Add(set.Inputs); } if (categorized.remaining != null) { nodes.Add(new SOMNode() { Position = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Position)), Weights = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Weights)), }); newInputs.Add(categorized.remaining. SelectMany(o => o.Inputs). ToArray()); } return(new SOMResult(nodes.ToArray(), newInputs.ToArray(), false)); }
/// <summary> /// This overload does an initial training, then recurses on any node that has too wide of a range of values /// </summary> /// <remarks> /// This method is a bit of a failure. Sometimes it works, but other times it just runs without fixing anything /// </remarks> /// <param name="maxSpreadPercent"> /// Spread is an input's distance from the center of all inputs. The percent is a node's max distance divided by all node's max distance. /// .65 to .75 is a good value to use (smaller values will chop up into more nodes) /// </param> public static SOMResult TrainSOM(ISOMInput[] inputs, SOMRules rules, double maxSpreadPercent, bool isDisplay2D, bool returnEmptyNodes = false) { const int MININPUTSFORSPLIT = 4; // Get the initial result SOMResult result = TrainSOM(inputs, rules, isDisplay2D, returnEmptyNodes); #region Divide large nodes double totalSpread = GetTotalSpread(inputs.Select(o => o.Weights)); int infiniteLoop = 0; while (infiniteLoop < 50) // if it exceeds this, just use whatever is there { // Split up nodes that have too much variation (image's distance from average) var reduced = Enumerable.Range(0, result.Nodes.Length). AsParallel(). Select(o => SplitNode(o, result, MININPUTSFORSPLIT, maxSpreadPercent, totalSpread, rules)). ToArray(); if (reduced.All(o => !o.Item1)) { // No changes were needed this pass break; } SOMNode[] reducedNodes = reduced. SelectMany(o => o.Item2). ToArray(); // Rebuild result ISOMInput[][] imagesByNode = SelfOrganizingMaps.GetInputsByNode(reducedNodes, inputs); result = new SOMResult(reducedNodes, imagesByNode, false); result = SelfOrganizingMaps.RemoveZeroNodes(result); infiniteLoop++; } #endregion // Inject positions into the nodes InjectNodePositions2D(result.Nodes); //TODO: Look at isDisplay2D result = ArrangeNodes_LikesAttract(result); return(result); }
/// <summary> /// Remove nodes that don't have any inputs /// </summary> private static SOMResult RemoveZeroNodes(SOMResult result) { List <SOMNode> subNodes = new List <SOMNode>(); List <ISOMInput[]> subImagesByNode = new List <ISOMInput[]>(); for (int cntr = 0; cntr < result.Nodes.Length; cntr++) { if (result.InputsByNode[cntr].Length == 0) { continue; } subNodes.Add(result.Nodes[cntr]); subImagesByNode.Add(result.InputsByNode[cntr]); } return(new SOMResult(subNodes.ToArray(), subImagesByNode.ToArray(), false)); }
/// <summary> /// This creates nodes with random weights based on the input's weights. After training, it creates random positions, and arranges /// the positions so similar sets are near each other /// </summary> /// <param name="inputs">These are items turned into vectors. They could be images, db row hashes, whatever</param> /// <param name="isDisplay2D">This doesn't affect the actual algorithm, just node.Position (true is 2D, false is 3D)</param> /// <param name="returnEmptyNodes">This shouldn't even be an option. Empty nodes are just artifacts that polute the final result</param> public static SOMResult TrainSOM(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D, bool returnEmptyNodes = false) { VectorND[] nodeWeights = GetRandomNodeWeights(rules.NumNodes, inputs); SOMNode[] nodes = nodeWeights. Select(o => new SOMNode() { Weights = o }). ToArray(); SOMResult retVal = TrainSOM(nodes, inputs, rules, returnEmptyNodes); // Inject positions into the nodes InjectNodePositions2D(retVal.Nodes); //TODO: Look at isDisplay2D retVal = ArrangeNodes_LikesAttract(retVal); return(retVal); }
/// <summary> /// This divides the border up into a voronoi, then each node is tiled with examples /// </summary> public static void ShowResults2D_Tiled(Border border, SOMResult result, int tileWidth, int tileHeight, Action <DrawTileArgs> drawTile, BlobEvents events = null) { //TODO: Take a func that will render the input onto a writable bitmap, or something dynamic but efficient? // or take these in? //int tileWidth, int tileHeight Point[] points = result.Nodes. Select(o => new Point(o.Position[0], o.Position[1])). ToArray(); Vector size = new Vector(border.ActualWidth - border.Padding.Left - border.Padding.Right, border.ActualHeight - border.Padding.Top - border.Padding.Bottom); VoronoiResult2D voronoi = Math2D.GetVoronoi(points, true); voronoi = Math2D.CapVoronoiCircle(voronoi); //voronoi = Math2D.CapVoronoiRectangle(voronoi, aspectRatio: 1d); //TODO: Implement this Canvas canvas = DrawVoronoi_Tiled(voronoi, result.Nodes, result.InputsByNode, size.X.ToInt_Floor(), size.Y.ToInt_Floor(), tileWidth, tileHeight, drawTile, events); border.Child = canvas; }
private static SOMResult TrainKMeans(int numNodes, ISOMInput[] inputs) { GetInitialKMeansNodes(out SOMNode[] returnNodes, out ISOMInput[][] inputsByNode, numNodes, inputs); while (true) { AdjustKMeansCenters(returnNodes, inputsByNode); ISOMInput[][] nextInputsByNode = GetInputsByNode(returnNodes, inputs); if (IsSame(inputsByNode, nextInputsByNode)) { break; } inputsByNode = nextInputsByNode; } //NOTE: The only time empty nodes should occur is if there are duplicate inputs SOMResult retVal = new SOMResult(returnNodes, inputsByNode, true); retVal = RemoveZeroNodes(retVal); return(retVal); }
private static (NodeCombo[] kmeans, NodeCombo[] keep, NodeCombo[] remaining) GetSOM_SplitMerge_Categorize(SOMResult result) { NodeCombo[] nodes = Enumerable.Range(0, result.Nodes.Length). Select(o => new NodeCombo() { Node = result.Nodes[o], Inputs = result.InputsByNode[o] }). OrderByDescending(o => o.Inputs.Length). ToArray(); // First node is a potential kmeans split NodeCombo kmeans = null; int keepStart = 0; if (nodes[0].Inputs.Length.ToDouble() / nodes[1].Inputs.Length.ToDouble() > 10) { kmeans = nodes[0]; keepStart = 1; } NodeCombo[] kmeansSplit = null; if (kmeans != null) { SOMResult result2 = SelfOrganizingMaps.TrainKMeans(kmeans.Inputs, 4, true); kmeansSplit = Enumerable.Range(0, result2.Nodes.Length). Select(o => new NodeCombo() { Node = result2.Nodes[o], Inputs = result2.InputsByNode[o] }). ToArray(); } // Next nodes are the ones to leave alone var keep = new List <NodeCombo>(); int?keepStop = null; keep.Add(nodes[keepStart]); for (int cntr = keepStart + 1; cntr < nodes.Length; cntr++) { if (nodes[keepStart].Inputs.Length.ToDouble() / nodes[cntr].Inputs.Length.ToDouble() > 10) { keepStop = cntr; break; } keep.Add(nodes[cntr]); } // Everything else gets merged into a single node NodeCombo[] remaining = null; if (keepStop != null) { remaining = Enumerable.Range(keepStop.Value, result.Nodes.Length - keepStop.Value). Select(o => nodes[o]). ToArray(); } if (remaining == null && keep.Count > 0 && kmeans != null) { int sumKeep = keep.Sum(o => o.Inputs.Length); int smallestKmeans = kmeansSplit. Select(o => o.Inputs.Length). OrderBy(o => o). First(); if (smallestKmeans.ToDouble() / sumKeep.ToDouble() > 10) { remaining = keep.ToArray(); keep.Clear(); } } return(kmeansSplit, keep.ToArray(), remaining); }
/// <summary> /// If two nodes are too close to each other, they get merged into one /// </summary> private static bool MergeTouchingNodes(ref SOMResult result, Tuple <int, int, double>[] distances, double minDist = .01) { // Find touching var touching = distances. Where(o => o.Item3 < minDist). ToArray(); if (touching.Length == 0) { return(false); } #region Merge key pairs // There could be several pairs that need to be joined. ex: // {0,2} {0,3} {2,5} -> {0,2,3,5} // {1,6} -> {1,6} List <List <int> > sets = new List <List <int> >(); foreach (var pair in touching) { List <int> existing = sets.FirstOrDefault(o => o.Contains(pair.Item1) || o.Contains(pair.Item2)); if (existing == null) { existing = new List <int>(); existing.Add(pair.Item1); existing.Add(pair.Item2); sets.Add(existing); } else { if (!existing.Contains(pair.Item1)) { existing.Add(pair.Item1); } else if (!existing.Contains(pair.Item2)) // if it didn't contain 1, then it matched on 2, so no need to look for 2 { existing.Add(pair.Item2); } } } #endregion #region Singular sets // Identify stand alone nodes, and add their index to the sets list (makes the next section easier to implement) for (int cntr = 0; cntr < result.Nodes.Length; cntr++) { if (!sets.Any(o => o.Contains(cntr))) { List <int> singleSet = new List <int>(); singleSet.Add(cntr); sets.Add(singleSet); } } #endregion #region Merge nodes List <SOMNode> newNodes = new List <SOMNode>(); List <ISOMInput[]> newImagesByNode = new List <ISOMInput[]>(); foreach (List <int> set in sets) { // Just use the first node (no need to take the average of weights since they're nearly identical, and taking the average position // doesn't add any value - later methods will move the node positions around anyway) newNodes.Add(result.Nodes[set[0]]); if (set.Count == 1) { newImagesByNode.Add(result.InputsByNode[set[0]]); } else { List <ISOMInput> mergedInputs = new List <ISOMInput>(); foreach (int index in set) { mergedInputs.AddRange(result.InputsByNode[index]); } newImagesByNode.Add(mergedInputs.ToArray()); } } #endregion result = new SOMResult(newNodes.ToArray(), newImagesByNode.ToArray(), result.IncludesEmptyNodes); return(true); }