Esempio n. 1
0
        /// <summary>
        /// This does a new SOM for one node (sort of like recursing on a node)
        /// </summary>
        /// <param name="index">The node to break apart</param>
        private static Tuple <bool, SOMNode[]> SplitNode(int index, SOMResult result, int minNodeItemsForSplit, double maxSpreadPercent, double totalSpread, SOMRules rules)
        {
            ISOMInput[] inputs = result.InputsByNode[index];

            // Don't split if there aren't enough inputs in the parent
            if (inputs.Length < minNodeItemsForSplit)
            {
                return(Tuple.Create(false, new[] { result.Nodes[index] }));
            }

            // See how this node's distances from the average compare with the total
            double nodeSpread    = GetTotalSpread(inputs.Select(o => o.Weights));
            double percentSpread = nodeSpread / totalSpread;

            if (percentSpread < maxSpreadPercent)
            {
                return(Tuple.Create(false, new[] { result.Nodes[index] }));
            }

            // Get random node weights.  Don't let any of those weights be closer to other nodes than this node
            VectorND[] weights = GetRandomWeights_InsideCell(rules.NumNodes, inputs, result.Nodes, index);

            SOMNode[] nodes = Enumerable.Range(0, rules.NumNodes).
                              Select(o => new SOMNode()
            {
                Weights = weights[o]
            }).
                              ToArray();

            // Split up this node
            SOMResult subResult = TrainSOM(nodes, inputs, rules, false);

            return(Tuple.Create(true, subResult.Nodes));
        }
Esempio n. 2
0
        /// <summary>
        /// This creates solid colored blobs with areas proportional to the number of items contained.  When the user
        /// mouses over a blob, the caller can show examples of the items as tooltips
        /// </summary>
        public static void ShowResults2D_Blobs(Border border, SOMResult result, Func <SOMNode, Color> getNodeColor, BlobEvents events = null)
        {
            #region validate
#if DEBUG
            if (!result.Nodes.All(o => o.Position.Size == 2))
            {
                throw new ArgumentException("Node positions need to be 2D");
            }
#endif
            #endregion

            Point[] points = result.Nodes.
                             Select(o => new Point(o.Position[0], o.Position[1])).
                             ToArray();

            VoronoiResult2D voronoi = Math2D.GetVoronoi(points, true);
            voronoi = Math2D.CapVoronoiCircle(voronoi);

            Color[] colors = result.Nodes.
                             Select(o => getNodeColor(o)).
                             ToArray();

            //ISOMInput[][] inputsByNode = UtilityCore.ConvertJaggedArray<ISOMInput>(result.InputsByNode);

            Vector size   = new Vector(border.ActualWidth - border.Padding.Left - border.Padding.Right, border.ActualHeight - border.Padding.Top - border.Padding.Bottom);
            Canvas canvas = DrawVoronoi_Blobs(voronoi, colors, result.Nodes, result.InputsByNode, size.X.ToInt_Floor(), size.Y.ToInt_Floor(), events);

            border.Child = canvas;
        }
Esempio n. 3
0
        public static SOMResult ArrangeNodes_LikesAttract(SOMResult result)
        {
            VectorND[] weights = result.Nodes.
                                 Select(o => o.Weights).
                                 ToArray();

            // Get the high dimension distances
            var desiredDistances = MathND.GetDistancesBetween(weights);

            // Merge nodes that have the same high dimension position
            if (MergeTouchingNodes(ref result, desiredDistances))
            {
                // Redo it
                weights = result.Nodes.
                          Select(o => o.Weights).
                          ToArray();

                desiredDistances = MathND.GetDistancesBetween(weights);
            }

            // Pull the low dimension positions to try to match the high dimension distances
            //NOTE: This has no effect on InputsByNode (those are high dimension)
            SOMNode[] nodes = MoveNodes_BallOfSprings(result.Nodes, desiredDistances, 1500);

            return(new SOMResult(nodes, result.InputsByNode, result.IncludesEmptyNodes));
        }
Esempio n. 4
0
        /// <summary>
        /// K-Means is a simpler algorithm than SOM
        /// </summary>
        /// <remarks>
        /// https://en.wikipedia.org/wiki/K-means_clustering
        ///
        /// SOM will make better quality clusters (but could fail and just make 1 or 2 massive clusters), K-Means is a good way of guaranteeing
        /// a certain number of clusters
        ///
        /// K-Means is a very different algorithm than SOM, but the inputs and outputs look the same, and the goal is very similar.  So
        /// throwing it in this class
        /// </remarks>
        public static SOMResult TrainKMeans(ISOMInput[] inputs, int numNodes, bool isDisplay2D)
        {
            SOMResult retVal = TrainKMeans(numNodes, inputs);

            // Inject positions into the nodes
            InjectNodePositions2D(retVal.Nodes);        //TODO: Look at isDisplay2D
            retVal = ArrangeNodes_LikesAttract(retVal);

            return(retVal);
        }
Esempio n. 5
0
        private static SOMResult TrainSOM(SOMNode[] nodes, ISOMInput[] inputs, SOMRules rules, bool returnEmptyNodes = false)
        {
            double mapRadius = MathND.GetRadius(MathND.GetAABB(nodes.Select(o => o.Weights)));

            SOMNode[] returnNodes = nodes.
                                    Select(o => o.Clone()).
                                    ToArray();

            double timeConstant = rules.NumIterations / Math.Log(mapRadius);

            int iteration           = 0;
            int remainingIterations = rules.NumIterations;

            while (remainingIterations > 0)
            {
                foreach (ISOMInput input in UtilityCore.RandomOrder(inputs, Math.Min(remainingIterations, inputs.Length)))
                {
                    // Find closest node
                    SOMNode closest = GetClosest(returnNodes, input).Item1;

                    // Find other affected nodes (a node and distance squared)
                    double searchRadius = mapRadius * rules.InitialRadiusPercent * Math.Exp(-iteration / timeConstant);
                    Tuple <SOMNode, double>[] neigbors = GetNeighbors(returnNodes, closest, searchRadius);

                    double learningRate = rules.LearningRate * Math.Exp(-(double)iteration / (double)rules.NumIterations);

                    // Adjust the matched node (full learning rate)
                    AdjustNodeWeights(closest, input.Weights, learningRate);

                    foreach (var node in neigbors)
                    {
                        double influence = GetInfluence(rules.AttractionFunction, node.Item2, searchRadius);

                        // Adjust a neighbor
                        AdjustNodeWeights(node.Item1, input.Weights, learningRate * influence);
                    }

                    iteration++;
                }

                remainingIterations -= inputs.Length;
            }

            // See which images go with which nodes
            ISOMInput[][] inputsByNode = GetInputsByNode(returnNodes, inputs);

            SOMResult retVal = new SOMResult(returnNodes, inputsByNode, true);

            if (!returnEmptyNodes)
            {
                retVal = RemoveZeroNodes(retVal);
            }

            return(retVal);
        }
Esempio n. 6
0
        /// <summary>
        /// This version starts with a SOM, then potentially splits the largest node and/or gathers the smallest nodes into a single
        /// </summary>
        /// <returns></returns>
        public static SOMResult Train(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D)
        {
            SOMResult result = SelfOrganizingMaps.TrainSOM(inputs, rules, isDisplay2D);

            if (result.Nodes.Length == 0)
            {
                return(result);
            }
            else if (result.Nodes.Length == 1)
            {
                #region kmeans single node

                if (inputs.Length < 20)
                {
                    return(result);
                }

                return(SelfOrganizingMaps.TrainKMeans(inputs, 5, true));

                #endregion
            }

            var categorized = GetSOM_SplitMerge_Categorize(result);

            List <SOMNode>     nodes     = new List <SOMNode>();
            List <ISOMInput[]> newInputs = new List <ISOMInput[]>();

            foreach (NodeCombo set in UtilityCore.Iterate(categorized.kmeans, categorized.keep))        // UtilityCore.Iterate gracefully skips nulls
            {
                nodes.Add(set.Node);
                newInputs.Add(set.Inputs);
            }

            if (categorized.remaining != null)
            {
                nodes.Add(new SOMNode()
                {
                    Position = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Position)),
                    Weights  = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Weights)),
                });

                newInputs.Add(categorized.remaining.
                              SelectMany(o => o.Inputs).
                              ToArray());
            }

            return(new SOMResult(nodes.ToArray(), newInputs.ToArray(), false));
        }
Esempio n. 7
0
        /// <summary>
        /// This overload does an initial training, then recurses on any node that has too wide of a range of values
        /// </summary>
        /// <remarks>
        /// This method is a bit of a failure.  Sometimes it works, but other times it just runs without fixing anything
        /// </remarks>
        /// <param name="maxSpreadPercent">
        /// Spread is an input's distance from the center of all inputs.  The percent is a node's max distance divided by all node's max distance.
        /// .65 to .75 is a good value to use (smaller values will chop up into more nodes)
        /// </param>
        public static SOMResult TrainSOM(ISOMInput[] inputs, SOMRules rules, double maxSpreadPercent, bool isDisplay2D, bool returnEmptyNodes = false)
        {
            const int MININPUTSFORSPLIT = 4;

            // Get the initial result
            SOMResult result = TrainSOM(inputs, rules, isDisplay2D, returnEmptyNodes);

            #region Divide large nodes

            double totalSpread = GetTotalSpread(inputs.Select(o => o.Weights));

            int infiniteLoop = 0;

            while (infiniteLoop < 50)     // if it exceeds this, just use whatever is there
            {
                // Split up nodes that have too much variation (image's distance from average)
                var reduced = Enumerable.Range(0, result.Nodes.Length).
                              AsParallel().
                              Select(o => SplitNode(o, result, MININPUTSFORSPLIT, maxSpreadPercent, totalSpread, rules)).
                              ToArray();

                if (reduced.All(o => !o.Item1))
                {
                    // No changes were needed this pass
                    break;
                }

                SOMNode[] reducedNodes = reduced.
                                         SelectMany(o => o.Item2).
                                         ToArray();

                // Rebuild result
                ISOMInput[][] imagesByNode = SelfOrganizingMaps.GetInputsByNode(reducedNodes, inputs);
                result = new SOMResult(reducedNodes, imagesByNode, false);

                result = SelfOrganizingMaps.RemoveZeroNodes(result);

                infiniteLoop++;
            }

            #endregion

            // Inject positions into the nodes
            InjectNodePositions2D(result.Nodes);        //TODO: Look at isDisplay2D
            result = ArrangeNodes_LikesAttract(result);

            return(result);
        }
Esempio n. 8
0
        /// <summary>
        /// Remove nodes that don't have any inputs
        /// </summary>
        private static SOMResult RemoveZeroNodes(SOMResult result)
        {
            List <SOMNode>     subNodes        = new List <SOMNode>();
            List <ISOMInput[]> subImagesByNode = new List <ISOMInput[]>();

            for (int cntr = 0; cntr < result.Nodes.Length; cntr++)
            {
                if (result.InputsByNode[cntr].Length == 0)
                {
                    continue;
                }

                subNodes.Add(result.Nodes[cntr]);
                subImagesByNode.Add(result.InputsByNode[cntr]);
            }

            return(new SOMResult(subNodes.ToArray(), subImagesByNode.ToArray(), false));
        }
Esempio n. 9
0
        /// <summary>
        /// This creates nodes with random weights based on the input's weights.  After training, it creates random positions, and arranges
        /// the positions so similar sets are near each other
        /// </summary>
        /// <param name="inputs">These are items turned into vectors.  They could be images, db row hashes, whatever</param>
        /// <param name="isDisplay2D">This doesn't affect the actual algorithm, just node.Position (true is 2D, false is 3D)</param>
        /// <param name="returnEmptyNodes">This shouldn't even be an option.  Empty nodes are just artifacts that polute the final result</param>
        public static SOMResult TrainSOM(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D, bool returnEmptyNodes = false)
        {
            VectorND[] nodeWeights = GetRandomNodeWeights(rules.NumNodes, inputs);

            SOMNode[] nodes = nodeWeights.
                              Select(o => new SOMNode()
            {
                Weights = o
            }).
                              ToArray();

            SOMResult retVal = TrainSOM(nodes, inputs, rules, returnEmptyNodes);

            // Inject positions into the nodes
            InjectNodePositions2D(retVal.Nodes);        //TODO: Look at isDisplay2D
            retVal = ArrangeNodes_LikesAttract(retVal);

            return(retVal);
        }
Esempio n. 10
0
        /// <summary>
        /// This divides the border up into a voronoi, then each node is tiled with examples
        /// </summary>
        public static void ShowResults2D_Tiled(Border border, SOMResult result, int tileWidth, int tileHeight, Action <DrawTileArgs> drawTile, BlobEvents events = null)
        {
            //TODO: Take a func that will render the input onto a writable bitmap, or something dynamic but efficient?
            // or take these in?
            //int tileWidth, int tileHeight



            Point[] points = result.Nodes.
                             Select(o => new Point(o.Position[0], o.Position[1])).
                             ToArray();

            Vector size = new Vector(border.ActualWidth - border.Padding.Left - border.Padding.Right, border.ActualHeight - border.Padding.Top - border.Padding.Bottom);

            VoronoiResult2D voronoi = Math2D.GetVoronoi(points, true);

            voronoi = Math2D.CapVoronoiCircle(voronoi);
            //voronoi = Math2D.CapVoronoiRectangle(voronoi, aspectRatio: 1d);       //TODO: Implement this

            Canvas canvas = DrawVoronoi_Tiled(voronoi, result.Nodes, result.InputsByNode, size.X.ToInt_Floor(), size.Y.ToInt_Floor(), tileWidth, tileHeight, drawTile, events);

            border.Child = canvas;
        }
Esempio n. 11
0
        private static SOMResult TrainKMeans(int numNodes, ISOMInput[] inputs)
        {
            GetInitialKMeansNodes(out SOMNode[] returnNodes, out ISOMInput[][] inputsByNode, numNodes, inputs);

            while (true)
            {
                AdjustKMeansCenters(returnNodes, inputsByNode);

                ISOMInput[][] nextInputsByNode = GetInputsByNode(returnNodes, inputs);

                if (IsSame(inputsByNode, nextInputsByNode))
                {
                    break;
                }

                inputsByNode = nextInputsByNode;
            }

            //NOTE: The only time empty nodes should occur is if there are duplicate inputs
            SOMResult retVal = new SOMResult(returnNodes, inputsByNode, true);

            retVal = RemoveZeroNodes(retVal);
            return(retVal);
        }
Esempio n. 12
0
        private static (NodeCombo[] kmeans, NodeCombo[] keep, NodeCombo[] remaining) GetSOM_SplitMerge_Categorize(SOMResult result)
        {
            NodeCombo[] nodes = Enumerable.Range(0, result.Nodes.Length).
                                Select(o => new NodeCombo()
            {
                Node = result.Nodes[o], Inputs = result.InputsByNode[o]
            }).
                                OrderByDescending(o => o.Inputs.Length).
                                ToArray();

            // First node is a potential kmeans split
            NodeCombo kmeans    = null;
            int       keepStart = 0;

            if (nodes[0].Inputs.Length.ToDouble() / nodes[1].Inputs.Length.ToDouble() > 10)
            {
                kmeans    = nodes[0];
                keepStart = 1;
            }

            NodeCombo[] kmeansSplit = null;
            if (kmeans != null)
            {
                SOMResult result2 = SelfOrganizingMaps.TrainKMeans(kmeans.Inputs, 4, true);

                kmeansSplit = Enumerable.Range(0, result2.Nodes.Length).
                              Select(o => new NodeCombo()
                {
                    Node = result2.Nodes[o], Inputs = result2.InputsByNode[o]
                }).
                              ToArray();
            }

            // Next nodes are the ones to leave alone
            var keep     = new List <NodeCombo>();
            int?keepStop = null;

            keep.Add(nodes[keepStart]);

            for (int cntr = keepStart + 1; cntr < nodes.Length; cntr++)
            {
                if (nodes[keepStart].Inputs.Length.ToDouble() / nodes[cntr].Inputs.Length.ToDouble() > 10)
                {
                    keepStop = cntr;
                    break;
                }

                keep.Add(nodes[cntr]);
            }

            // Everything else gets merged into a single node
            NodeCombo[] remaining = null;
            if (keepStop != null)
            {
                remaining = Enumerable.Range(keepStop.Value, result.Nodes.Length - keepStop.Value).
                            Select(o => nodes[o]).
                            ToArray();
            }

            if (remaining == null && keep.Count > 0 && kmeans != null)
            {
                int sumKeep = keep.Sum(o => o.Inputs.Length);

                int smallestKmeans = kmeansSplit.
                                     Select(o => o.Inputs.Length).
                                     OrderBy(o => o).
                                     First();

                if (smallestKmeans.ToDouble() / sumKeep.ToDouble() > 10)
                {
                    remaining = keep.ToArray();
                    keep.Clear();
                }
            }

            return(kmeansSplit, keep.ToArray(), remaining);
        }
Esempio n. 13
0
        /// <summary>
        /// If two nodes are too close to each other, they get merged into one
        /// </summary>
        private static bool MergeTouchingNodes(ref SOMResult result, Tuple <int, int, double>[] distances, double minDist = .01)
        {
            // Find touching
            var touching = distances.
                           Where(o => o.Item3 < minDist).
                           ToArray();

            if (touching.Length == 0)
            {
                return(false);
            }

            #region Merge key pairs

            // There could be several pairs that need to be joined.  ex:
            //      {0,2} {0,3} {2,5}       ->      {0,2,3,5}
            //      {1,6}       ->      {1,6}

            List <List <int> > sets = new List <List <int> >();

            foreach (var pair in touching)
            {
                List <int> existing = sets.FirstOrDefault(o => o.Contains(pair.Item1) || o.Contains(pair.Item2));
                if (existing == null)
                {
                    existing = new List <int>();
                    existing.Add(pair.Item1);
                    existing.Add(pair.Item2);
                    sets.Add(existing);
                }
                else
                {
                    if (!existing.Contains(pair.Item1))
                    {
                        existing.Add(pair.Item1);
                    }
                    else if (!existing.Contains(pair.Item2))     // if it didn't contain 1, then it matched on 2, so no need to look for 2
                    {
                        existing.Add(pair.Item2);
                    }
                }
            }

            #endregion
            #region Singular sets

            // Identify stand alone nodes, and add their index to the sets list (makes the next section easier to implement)

            for (int cntr = 0; cntr < result.Nodes.Length; cntr++)
            {
                if (!sets.Any(o => o.Contains(cntr)))
                {
                    List <int> singleSet = new List <int>();
                    singleSet.Add(cntr);
                    sets.Add(singleSet);
                }
            }

            #endregion
            #region Merge nodes

            List <SOMNode>     newNodes        = new List <SOMNode>();
            List <ISOMInput[]> newImagesByNode = new List <ISOMInput[]>();

            foreach (List <int> set in sets)
            {
                // Just use the first node (no need to take the average of weights since they're nearly identical, and taking the average position
                // doesn't add any value - later methods will move the node positions around anyway)
                newNodes.Add(result.Nodes[set[0]]);

                if (set.Count == 1)
                {
                    newImagesByNode.Add(result.InputsByNode[set[0]]);
                }
                else
                {
                    List <ISOMInput> mergedInputs = new List <ISOMInput>();
                    foreach (int index in set)
                    {
                        mergedInputs.AddRange(result.InputsByNode[index]);
                    }

                    newImagesByNode.Add(mergedInputs.ToArray());
                }
            }

            #endregion

            result = new SOMResult(newNodes.ToArray(), newImagesByNode.ToArray(), result.IncludesEmptyNodes);
            return(true);
        }