Ejemplo n.º 1
0
        /// <summary>
        /// This converts the value into a normalized vector (values from -1 to 1 in each dimension)
        /// </summary>
        /// <remarks>
        /// This is useful if you want to convert numbers into vectors
        ///
        /// Say you want to do a SOM against a database.  Each column needs to be mapped to a vector.  Then all vectors of a row will get
        /// stitched together to be one intance of ISOMInput.Weights
        ///
        /// If one of the columns is numeric (maybe dollars or quantities), then you would use this method
        ///
        /// The first step would be to prequery so see what the range of possible values are.  Run that maximum expected value through
        /// GetConvertBaseProps() to figure out what base to represent the numbers as.  This method converts the number to that base,
        /// then normalizes each digit to -1 to 1 (sort of like percent of base)
        /// </remarks>
        private static double[] ConvertToVector_Direct(double value, SOMConvertToVectorProps props)
        {
            // Convert to a different base
            long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong);

            int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value);

            // Too big, return 1s
            if (converted.Length > props.Width)
            {
                double maxValue = value < 0 ? -1d : 1d;
                return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray());
            }

            // Normalize (treat each item like a percent)
            double baseDbl = props.Number_BaseConvertTo.Value.ToDouble();

            double[] normalized = converted.
                                  Select(o => o.ToDouble() / baseDbl).
                                  ToArray();

            // Return, make sure the array is the right size
            if (normalized.Length < props.Width)
            {
                return(Enumerable.Range(0, props.Width - normalized.Length).
                       Select(o => 0d).
                       Concat(normalized).
                       ToArray());
            }
            else
            {
                return(normalized);
            }
        }
Ejemplo n.º 2
0
        public static SOMResult ArrangeNodes_LikesAttract(SOMResult result)
        {
            VectorND[] weights = result.Nodes.
                                 Select(o => o.Weights).
                                 ToArray();

            // Get the high dimension distances
            var desiredDistances = MathND.GetDistancesBetween(weights);

            // Merge nodes that have the same high dimension position
            if (MergeTouchingNodes(ref result, desiredDistances))
            {
                // Redo it
                weights = result.Nodes.
                          Select(o => o.Weights).
                          ToArray();

                desiredDistances = MathND.GetDistancesBetween(weights);
            }

            // Pull the low dimension positions to try to match the high dimension distances
            //NOTE: This has no effect on InputsByNode (those are high dimension)
            SOMNode[] nodes = MoveNodes_BallOfSprings(result.Nodes, desiredDistances, 1500);

            return(new SOMResult(nodes, result.InputsByNode, result.IncludesEmptyNodes));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// This gets the bounding box of all the input values, then creates random vectors within that box
        /// </summary>
        private static VectorND[] GetRandomNodeWeights(int count, ISOMInput[] inputs)
        {
            var aabb = MathND.GetAABB(inputs.Select(o => o.Weights));

            aabb = MathND.ResizeAABB(aabb, 1.1);     // allow the return vectors to be slightly outside the input box

            return(Enumerable.Range(0, count).
                   Select(o => MathND.GetRandomVector(aabb.Item1, aabb.Item2)).
                   ToArray());
        }
Ejemplo n.º 4
0
        private static SOMResult TrainSOM(SOMNode[] nodes, ISOMInput[] inputs, SOMRules rules, bool returnEmptyNodes = false)
        {
            double mapRadius = MathND.GetRadius(MathND.GetAABB(nodes.Select(o => o.Weights)));

            SOMNode[] returnNodes = nodes.
                                    Select(o => o.Clone()).
                                    ToArray();

            double timeConstant = rules.NumIterations / Math.Log(mapRadius);

            int iteration           = 0;
            int remainingIterations = rules.NumIterations;

            while (remainingIterations > 0)
            {
                foreach (ISOMInput input in UtilityCore.RandomOrder(inputs, Math.Min(remainingIterations, inputs.Length)))
                {
                    // Find closest node
                    SOMNode closest = GetClosest(returnNodes, input).Item1;

                    // Find other affected nodes (a node and distance squared)
                    double searchRadius = mapRadius * rules.InitialRadiusPercent * Math.Exp(-iteration / timeConstant);
                    Tuple <SOMNode, double>[] neigbors = GetNeighbors(returnNodes, closest, searchRadius);

                    double learningRate = rules.LearningRate * Math.Exp(-(double)iteration / (double)rules.NumIterations);

                    // Adjust the matched node (full learning rate)
                    AdjustNodeWeights(closest, input.Weights, learningRate);

                    foreach (var node in neigbors)
                    {
                        double influence = GetInfluence(rules.AttractionFunction, node.Item2, searchRadius);

                        // Adjust a neighbor
                        AdjustNodeWeights(node.Item1, input.Weights, learningRate * influence);
                    }

                    iteration++;
                }

                remainingIterations -= inputs.Length;
            }

            // See which images go with which nodes
            ISOMInput[][] inputsByNode = GetInputsByNode(returnNodes, inputs);

            SOMResult retVal = new SOMResult(returnNodes, inputsByNode, true);

            if (!returnEmptyNodes)
            {
                retVal = RemoveZeroNodes(retVal);
            }

            return(retVal);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// This is similar logic as standard deviation, but this returns the max distance from the average
        /// NOTE: Spread is probably the wrong word, since this only returns the max distance (radius instead of diameter)
        /// </summary>
        public static double GetTotalSpread(IEnumerable <VectorND> values)
        {
            VectorND mean = MathND.GetCenter(values);

            double distancesSquared = values.
                                      Select(o => (o - mean).LengthSquared).
                                      OrderByDescending().
                                      First();

            return(Math.Sqrt(distancesSquared));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// This version starts with a SOM, then potentially splits the largest node and/or gathers the smallest nodes into a single
        /// </summary>
        /// <returns></returns>
        public static SOMResult Train(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D)
        {
            SOMResult result = SelfOrganizingMaps.TrainSOM(inputs, rules, isDisplay2D);

            if (result.Nodes.Length == 0)
            {
                return(result);
            }
            else if (result.Nodes.Length == 1)
            {
                #region kmeans single node

                if (inputs.Length < 20)
                {
                    return(result);
                }

                return(SelfOrganizingMaps.TrainKMeans(inputs, 5, true));

                #endregion
            }

            var categorized = GetSOM_SplitMerge_Categorize(result);

            List <SOMNode>     nodes     = new List <SOMNode>();
            List <ISOMInput[]> newInputs = new List <ISOMInput[]>();

            foreach (NodeCombo set in UtilityCore.Iterate(categorized.kmeans, categorized.keep))        // UtilityCore.Iterate gracefully skips nulls
            {
                nodes.Add(set.Node);
                newInputs.Add(set.Inputs);
            }

            if (categorized.remaining != null)
            {
                nodes.Add(new SOMNode()
                {
                    Position = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Position)),
                    Weights  = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Weights)),
                });

                newInputs.Add(categorized.remaining.
                              SelectMany(o => o.Inputs).
                              ToArray());
            }

            return(new SOMResult(nodes.ToArray(), newInputs.ToArray(), false));
        }
Ejemplo n.º 7
0
        /// <summary>
        /// This pulls the low dimension positions toward ideal configurations based on the corresponding high
        /// dimension relationships (the links between nodes act like springs)
        /// </summary>
        private static SOMNode[] MoveNodes_BallOfSprings(SOMNode[] nodes, Tuple <int, int, double>[] desiredDistances, int numIterations)
        {
            VectorND[] positions = nodes.
                                   Select(o => o.Position).
                                   ToArray();

            positions = MathND.ApplyBallOfSprings(positions, desiredDistances, numIterations);

            // Rebuild nodes
            return(Enumerable.Range(0, nodes.Length).
                   Select(o => new SOMNode()
            {
                Weights = nodes[o].Weights,
                Position = positions[o]
            }).
                   ToArray());
        }
Ejemplo n.º 8
0
        /// <summary>
        /// This does a bounding box of inputs, and also makes sure all positions are closer to the desired node than
        /// other nodes
        /// </summary>
        private static VectorND[] GetRandomWeights_InsideCell(int count, ISOMInput[] inputs, SOMNode[] nodes, int nodeIndex)
        {
            var inputAABB = MathND.GetAABB(inputs.Select(o => o.Weights));

            // This actually could happen.  Detecting an infinite loop instead
            //if (!MathND.IsInside(inputAABB, nodes[nodeIndex].Weights))
            //{
            //    throw new ArgumentException("The node sits outside the input's aabb");
            //}

            List <VectorND> retVal = new List <VectorND>();

            for (int cntr = 0; cntr < count; cntr++)
            {
                int infiniteLoopDetector = 0;

                while (true)
                {
                    VectorND attempt = MathND.GetRandomVector(inputAABB.Item1, inputAABB.Item2);

                    var closest = nodes.
                                  Select((o, i) => new { Index = i, DistSquared = (attempt - o.Weights).LengthSquared }).
                                  OrderBy(o => o.DistSquared).
                                  First();

                    if (closest.Index == nodeIndex)
                    {
                        retVal.Add(attempt);
                        break;
                    }

                    infiniteLoopDetector++;
                    if (infiniteLoopDetector > 1000)
                    {
                        // Instead of giving up, increase the range that the weights can exist in.  When testing this other method, there is almost never an improved
                        // node.  It's just an infinite loop in the caller (keeps trying for an improvement, but it never happens)
                        return(GetRandomWeights_InsideCell_LARGERBUTFAIL(count, inputs, nodes, nodeIndex));
                    }
                }
            }

            return(retVal.ToArray());
        }
Ejemplo n.º 9
0
        private static void AdjustKMeansCenters(SOMNode[] nodes, ISOMInput[][] inputsByNode)
        {
            if (nodes.Length != inputsByNode.Length)
            {
                throw new ArgumentException("Arrays must be the same size");
            }

            for (int cntr = 0; cntr < nodes.Length; cntr++)
            {
                if (inputsByNode[cntr].Length == 0)
                {
                    // This happened when there were a bunch of identical images.  Otherwise, it should never happen
                    //throw new ArgumentException("Must have inputs for every node");
                    continue;
                }

                nodes[cntr].Weights = MathND.GetCenter(inputsByNode[cntr].Select(o => o.Weights));
            }
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Returns the portion of this vector that lies along the other vector
        /// NOTE: The return will be the same direction as alongVector, but the length from zero to this vector's full length
        /// </summary>
        /// <remarks>
        /// This is copied from the Vector3D version
        /// </remarks>
        public static VectorND GetProjectedVector(this VectorND vector, VectorND alongVector, bool eitherDirection = true)
        {
            // c = (a dot unit(b)) * unit(b)

            if (vector.IsNearZero() || alongVector.IsNearZero())
            {
                return(MathND.GetZeroVector(vector, alongVector));
            }

            VectorND alongVectorUnit = alongVector.ToUnit();

            double length = VectorND.DotProduct(vector, alongVectorUnit);

            if (!eitherDirection && length < 0)
            {
                // It's in the oppositie direction, and that isn't allowed
                return(MathND.GetZeroVector(vector, alongVector));
            }

            return(alongVectorUnit * length);
        }
Ejemplo n.º 11
0
        /// <summary>
        /// This washes the bits to the right with values approaching one
        /// </summary>
        /// <remarks>
        /// The leftmost bit is most significant, and needs to be returned acurately.  The bits to the right don't matter as much, but
        /// the self organizing map just groups things together based on the pattern of the bits.  So the bits to the right need to approach
        /// one (think of them as overidden by the bits to the left)
        ///
        /// I didn't want linear, I wanted something faster.  So the bits to the right follow a sqrt curve (x axis scaled between 0 and 1
        /// over the remaining bits)
        ///
        /// Example:
        ///     If this trend toward one isn't there, then these two values would map close to each other (even though the first one represents
        ///     1, and the second could represent 201)
        ///         0 0 0 0 1
        ///         0 .1 0 0 1
        ///     This method would turn these into something like:
        ///         0 0 0 0 1
        ///         0 .1 .6 .95 1       --- bits to the right follow a sqrt toward 1
        ///
        /// Instead of sqrt, it's actually between x^POWMIN and x^POWMAX.  The value of the bit becomes a percent from min to max
        /// </remarks>
        private static double[] ConvertToVector_LeftSignificant(double value, SOMConvertToVectorProps props)
        {
            const double POWMIN = .1;
            const double POWMAX = .04;

            // Convert to a different base
            long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong);

            int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value);

            if (converted.Length == 0)
            {
                // Zero, return 0s
                return(Enumerable.Range(0, props.Width).Select(o => 0d).ToArray());
            }
            else if (converted.Length > props.Width)
            {
                // Too big, return 1s
                double maxValue = value < 0 ? -1d : 1d;
                return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray());
            }

            // Normalize so it's between -1 and 1
            double[] normalized = new double[converted.Length];

            double baseDbl = props.Number_BaseConvertTo.Value.ToDouble();

            // Leftmost bit
            normalized[0] = converted[0].ToDouble() / baseDbl;
            double absFirst = Math.Abs(normalized[0]);

            // Bits to the right of the leftmost (their values are made to approach 1)
            if (converted.Length > 1)
            {
                // The sqrt will be between 0 and 1, so scale the x and y
                double yGap   = 1d - absFirst;
                double xScale = 1d / (normalized.Length - 1);

                for (int cntr = 1; cntr < normalized.Length; cntr++)
                {
                    // Y will be between these two curves
                    double yMin = Math.Pow(cntr * xScale, POWMIN);
                    double yMax = Math.Pow(cntr * xScale, POWMAX);

                    // Treat this bit like a percent between the two curves
                    double y = UtilityCore.GetScaledValue(yMin, yMax, 0, props.Number_BaseConvertTo.Value, Math.Abs(converted[cntr]));

                    y *= yGap;
                    y += absFirst;

                    if (normalized[0] < 0)
                    {
                        y = -y;
                    }

                    normalized[cntr] = y;
                }
            }

            // Return, make sure the array is the right size
            if (normalized.Length < props.Width)
            {
                return(Enumerable.Range(0, props.Width - normalized.Length).
                       Select(o => 0d).
                       Concat(normalized).
                       ToArray());
            }
            else
            {
                return(normalized);
            }
        }
Ejemplo n.º 12
0
 public static bool IsNearValue(this VectorND vector, VectorND other)
 {
     return(MathND.IsNearValue(vector.VectorArray, other.VectorArray));
 }
Ejemplo n.º 13
0
        /// <summary>
        /// Only call this from the other overload.  This uses a much larger bounding box (it still makes sure all returned points
        /// are closer to the desired node than other nodes)
        /// </summary>
        private static VectorND[] GetRandomWeights_InsideCell_LARGERBUTFAIL(int count, ISOMInput[] inputs, SOMNode[] nodes, int nodeIndex)
        {
            //TODO: May want to get a voronoi, then choose random points within the convex hull (more elegant, but may not be any faster)

            // This works, but the caller generally never finds a solution that's better then what it already has.  So if you need
            // to use this overload, you're just going to spin the processor

            #region calculate rectangle to choose points from

            // Get some nearby nodes
            double?largestDistance = Enumerable.Range(0, nodes.Length).
                                     Where(o => o != nodeIndex).
                                     Select(o => (nodes[o].Weights - nodes[nodeIndex].Weights).LengthSquared).
                                     OrderByDescending(o => o).
                                     FirstOrDefault();

            var inputAABB = MathND.GetAABB(inputs.Select(o => o.Weights));

            if (largestDistance == null)
            {
                largestDistance = Math.Max(
                    (inputAABB.Item1 - nodes[nodeIndex].Weights).LengthSquared,
                    (inputAABB.Item2 - nodes[nodeIndex].Weights).LengthSquared);
            }

            largestDistance = Math.Sqrt(largestDistance.Value);

            var bounds = MathND.GetAABB(new[]
            {
                nodes[nodeIndex].Weights.Select(o => o - largestDistance.Value).ToArray().ToVectorND(),
                nodes[nodeIndex].Weights.Select(o => o + largestDistance.Value).ToArray().ToVectorND(),
                inputAABB.Item1,
                inputAABB.Item2,
            });

            #endregion



            List <VectorND> retVal = new List <VectorND>();
            //int largestIteration = 0;

            for (int cntr = 0; cntr < count; cntr++)
            {
                int infiniteLoopDetector = 0;

                while (true)
                {
                    //if(infiniteLoopDetector > largestIteration)
                    //{
                    //    largestIteration = infiniteLoopDetector;      // I tested several times, and averaged about 5-15 iterations
                    //}

                    VectorND attempt = MathND.GetRandomVector(bounds.Item1, bounds.Item2);

                    var closest = nodes.
                                  Select((o, i) => new { Index = i, DistSquared = (attempt - o.Weights).LengthSquared }).
                                  OrderBy(o => o.DistSquared).
                                  First();

                    if (closest.Index == nodeIndex)
                    {
                        retVal.Add(attempt);
                        break;
                    }

                    infiniteLoopDetector++;
                    if (infiniteLoopDetector > 1000)
                    {
                        throw new ApplicationException("Infinite loop detected");
                    }
                }
            }

            return(retVal.ToArray());
        }