private static SOMResult TrainSOM(SOMNode[] nodes, ISOMInput[] inputs, SOMRules rules, bool returnEmptyNodes = false) { double mapRadius = MathND.GetRadius(MathND.GetAABB(nodes.Select(o => o.Weights))); SOMNode[] returnNodes = nodes. Select(o => o.Clone()). ToArray(); double timeConstant = rules.NumIterations / Math.Log(mapRadius); int iteration = 0; int remainingIterations = rules.NumIterations; while (remainingIterations > 0) { foreach (ISOMInput input in UtilityCore.RandomOrder(inputs, Math.Min(remainingIterations, inputs.Length))) { // Find closest node SOMNode closest = GetClosest(returnNodes, input).Item1; // Find other affected nodes (a node and distance squared) double searchRadius = mapRadius * rules.InitialRadiusPercent * Math.Exp(-iteration / timeConstant); Tuple <SOMNode, double>[] neigbors = GetNeighbors(returnNodes, closest, searchRadius); double learningRate = rules.LearningRate * Math.Exp(-(double)iteration / (double)rules.NumIterations); // Adjust the matched node (full learning rate) AdjustNodeWeights(closest, input.Weights, learningRate); foreach (var node in neigbors) { double influence = GetInfluence(rules.AttractionFunction, node.Item2, searchRadius); // Adjust a neighbor AdjustNodeWeights(node.Item1, input.Weights, learningRate * influence); } iteration++; } remainingIterations -= inputs.Length; } // See which images go with which nodes ISOMInput[][] inputsByNode = GetInputsByNode(returnNodes, inputs); SOMResult retVal = new SOMResult(returnNodes, inputsByNode, true); if (!returnEmptyNodes) { retVal = RemoveZeroNodes(retVal); } return(retVal); }
/// <summary> /// This version starts with a SOM, then potentially splits the largest node and/or gathers the smallest nodes into a single /// </summary> /// <returns></returns> public static SOMResult Train(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D) { SOMResult result = SelfOrganizingMaps.TrainSOM(inputs, rules, isDisplay2D); if (result.Nodes.Length == 0) { return(result); } else if (result.Nodes.Length == 1) { #region kmeans single node if (inputs.Length < 20) { return(result); } return(SelfOrganizingMaps.TrainKMeans(inputs, 5, true)); #endregion } var categorized = GetSOM_SplitMerge_Categorize(result); List <SOMNode> nodes = new List <SOMNode>(); List <ISOMInput[]> newInputs = new List <ISOMInput[]>(); foreach (NodeCombo set in UtilityCore.Iterate(categorized.kmeans, categorized.keep)) // UtilityCore.Iterate gracefully skips nulls { nodes.Add(set.Node); newInputs.Add(set.Inputs); } if (categorized.remaining != null) { nodes.Add(new SOMNode() { Position = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Position)), Weights = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Weights)), }); newInputs.Add(categorized.remaining. SelectMany(o => o.Inputs). ToArray()); } return(new SOMResult(nodes.ToArray(), newInputs.ToArray(), false)); }
/// <summary> /// This overload does an initial training, then recurses on any node that has too wide of a range of values /// </summary> /// <remarks> /// This method is a bit of a failure. Sometimes it works, but other times it just runs without fixing anything /// </remarks> /// <param name="maxSpreadPercent"> /// Spread is an input's distance from the center of all inputs. The percent is a node's max distance divided by all node's max distance. /// .65 to .75 is a good value to use (smaller values will chop up into more nodes) /// </param> public static SOMResult TrainSOM(ISOMInput[] inputs, SOMRules rules, double maxSpreadPercent, bool isDisplay2D, bool returnEmptyNodes = false) { const int MININPUTSFORSPLIT = 4; // Get the initial result SOMResult result = TrainSOM(inputs, rules, isDisplay2D, returnEmptyNodes); #region Divide large nodes double totalSpread = GetTotalSpread(inputs.Select(o => o.Weights)); int infiniteLoop = 0; while (infiniteLoop < 50) // if it exceeds this, just use whatever is there { // Split up nodes that have too much variation (image's distance from average) var reduced = Enumerable.Range(0, result.Nodes.Length). AsParallel(). Select(o => SplitNode(o, result, MININPUTSFORSPLIT, maxSpreadPercent, totalSpread, rules)). ToArray(); if (reduced.All(o => !o.Item1)) { // No changes were needed this pass break; } SOMNode[] reducedNodes = reduced. SelectMany(o => o.Item2). ToArray(); // Rebuild result ISOMInput[][] imagesByNode = SelfOrganizingMaps.GetInputsByNode(reducedNodes, inputs); result = new SOMResult(reducedNodes, imagesByNode, false); result = SelfOrganizingMaps.RemoveZeroNodes(result); infiniteLoop++; } #endregion // Inject positions into the nodes InjectNodePositions2D(result.Nodes); //TODO: Look at isDisplay2D result = ArrangeNodes_LikesAttract(result); return(result); }
/// <summary> /// This creates nodes with random weights based on the input's weights. After training, it creates random positions, and arranges /// the positions so similar sets are near each other /// </summary> /// <param name="inputs">These are items turned into vectors. They could be images, db row hashes, whatever</param> /// <param name="isDisplay2D">This doesn't affect the actual algorithm, just node.Position (true is 2D, false is 3D)</param> /// <param name="returnEmptyNodes">This shouldn't even be an option. Empty nodes are just artifacts that polute the final result</param> public static SOMResult TrainSOM(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D, bool returnEmptyNodes = false) { VectorND[] nodeWeights = GetRandomNodeWeights(rules.NumNodes, inputs); SOMNode[] nodes = nodeWeights. Select(o => new SOMNode() { Weights = o }). ToArray(); SOMResult retVal = TrainSOM(nodes, inputs, rules, returnEmptyNodes); // Inject positions into the nodes InjectNodePositions2D(retVal.Nodes); //TODO: Look at isDisplay2D retVal = ArrangeNodes_LikesAttract(retVal); return(retVal); }
/// <summary> /// This does a new SOM for one node (sort of like recursing on a node) /// </summary> /// <param name="index">The node to break apart</param> private static Tuple <bool, SOMNode[]> SplitNode(int index, SOMResult result, int minNodeItemsForSplit, double maxSpreadPercent, double totalSpread, SOMRules rules) { ISOMInput[] inputs = result.InputsByNode[index]; // Don't split if there aren't enough inputs in the parent if (inputs.Length < minNodeItemsForSplit) { return(Tuple.Create(false, new[] { result.Nodes[index] })); } // See how this node's distances from the average compare with the total double nodeSpread = GetTotalSpread(inputs.Select(o => o.Weights)); double percentSpread = nodeSpread / totalSpread; if (percentSpread < maxSpreadPercent) { return(Tuple.Create(false, new[] { result.Nodes[index] })); } // Get random node weights. Don't let any of those weights be closer to other nodes than this node VectorND[] weights = GetRandomWeights_InsideCell(rules.NumNodes, inputs, result.Nodes, index); SOMNode[] nodes = Enumerable.Range(0, rules.NumNodes). Select(o => new SOMNode() { Weights = weights[o] }). ToArray(); // Split up this node SOMResult subResult = TrainSOM(nodes, inputs, rules, false); return(Tuple.Create(true, subResult.Nodes)); }