/// <summary> /// This converts the value into a normalized vector (values from -1 to 1 in each dimension) /// </summary> /// <remarks> /// This is useful if you want to convert numbers into vectors /// /// Say you want to do a SOM against a database. Each column needs to be mapped to a vector. Then all vectors of a row will get /// stitched together to be one intance of ISOMInput.Weights /// /// If one of the columns is numeric (maybe dollars or quantities), then you would use this method /// /// The first step would be to prequery so see what the range of possible values are. Run that maximum expected value through /// GetConvertBaseProps() to figure out what base to represent the numbers as. This method converts the number to that base, /// then normalizes each digit to -1 to 1 (sort of like percent of base) /// </remarks> private static double[] ConvertToVector_Direct(double value, SOMConvertToVectorProps props) { // Convert to a different base long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong); int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value); // Too big, return 1s if (converted.Length > props.Width) { double maxValue = value < 0 ? -1d : 1d; return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray()); } // Normalize (treat each item like a percent) double baseDbl = props.Number_BaseConvertTo.Value.ToDouble(); double[] normalized = converted. Select(o => o.ToDouble() / baseDbl). ToArray(); // Return, make sure the array is the right size if (normalized.Length < props.Width) { return(Enumerable.Range(0, props.Width - normalized.Length). Select(o => 0d). Concat(normalized). ToArray()); } else { return(normalized); } }
public static SOMResult ArrangeNodes_LikesAttract(SOMResult result) { VectorND[] weights = result.Nodes. Select(o => o.Weights). ToArray(); // Get the high dimension distances var desiredDistances = MathND.GetDistancesBetween(weights); // Merge nodes that have the same high dimension position if (MergeTouchingNodes(ref result, desiredDistances)) { // Redo it weights = result.Nodes. Select(o => o.Weights). ToArray(); desiredDistances = MathND.GetDistancesBetween(weights); } // Pull the low dimension positions to try to match the high dimension distances //NOTE: This has no effect on InputsByNode (those are high dimension) SOMNode[] nodes = MoveNodes_BallOfSprings(result.Nodes, desiredDistances, 1500); return(new SOMResult(nodes, result.InputsByNode, result.IncludesEmptyNodes)); }
/// <summary> /// This gets the bounding box of all the input values, then creates random vectors within that box /// </summary> private static VectorND[] GetRandomNodeWeights(int count, ISOMInput[] inputs) { var aabb = MathND.GetAABB(inputs.Select(o => o.Weights)); aabb = MathND.ResizeAABB(aabb, 1.1); // allow the return vectors to be slightly outside the input box return(Enumerable.Range(0, count). Select(o => MathND.GetRandomVector(aabb.Item1, aabb.Item2)). ToArray()); }
private static SOMResult TrainSOM(SOMNode[] nodes, ISOMInput[] inputs, SOMRules rules, bool returnEmptyNodes = false) { double mapRadius = MathND.GetRadius(MathND.GetAABB(nodes.Select(o => o.Weights))); SOMNode[] returnNodes = nodes. Select(o => o.Clone()). ToArray(); double timeConstant = rules.NumIterations / Math.Log(mapRadius); int iteration = 0; int remainingIterations = rules.NumIterations; while (remainingIterations > 0) { foreach (ISOMInput input in UtilityCore.RandomOrder(inputs, Math.Min(remainingIterations, inputs.Length))) { // Find closest node SOMNode closest = GetClosest(returnNodes, input).Item1; // Find other affected nodes (a node and distance squared) double searchRadius = mapRadius * rules.InitialRadiusPercent * Math.Exp(-iteration / timeConstant); Tuple <SOMNode, double>[] neigbors = GetNeighbors(returnNodes, closest, searchRadius); double learningRate = rules.LearningRate * Math.Exp(-(double)iteration / (double)rules.NumIterations); // Adjust the matched node (full learning rate) AdjustNodeWeights(closest, input.Weights, learningRate); foreach (var node in neigbors) { double influence = GetInfluence(rules.AttractionFunction, node.Item2, searchRadius); // Adjust a neighbor AdjustNodeWeights(node.Item1, input.Weights, learningRate * influence); } iteration++; } remainingIterations -= inputs.Length; } // See which images go with which nodes ISOMInput[][] inputsByNode = GetInputsByNode(returnNodes, inputs); SOMResult retVal = new SOMResult(returnNodes, inputsByNode, true); if (!returnEmptyNodes) { retVal = RemoveZeroNodes(retVal); } return(retVal); }
/// <summary> /// This is similar logic as standard deviation, but this returns the max distance from the average /// NOTE: Spread is probably the wrong word, since this only returns the max distance (radius instead of diameter) /// </summary> public static double GetTotalSpread(IEnumerable <VectorND> values) { VectorND mean = MathND.GetCenter(values); double distancesSquared = values. Select(o => (o - mean).LengthSquared). OrderByDescending(). First(); return(Math.Sqrt(distancesSquared)); }
/// <summary> /// This version starts with a SOM, then potentially splits the largest node and/or gathers the smallest nodes into a single /// </summary> /// <returns></returns> public static SOMResult Train(ISOMInput[] inputs, SOMRules rules, bool isDisplay2D) { SOMResult result = SelfOrganizingMaps.TrainSOM(inputs, rules, isDisplay2D); if (result.Nodes.Length == 0) { return(result); } else if (result.Nodes.Length == 1) { #region kmeans single node if (inputs.Length < 20) { return(result); } return(SelfOrganizingMaps.TrainKMeans(inputs, 5, true)); #endregion } var categorized = GetSOM_SplitMerge_Categorize(result); List <SOMNode> nodes = new List <SOMNode>(); List <ISOMInput[]> newInputs = new List <ISOMInput[]>(); foreach (NodeCombo set in UtilityCore.Iterate(categorized.kmeans, categorized.keep)) // UtilityCore.Iterate gracefully skips nulls { nodes.Add(set.Node); newInputs.Add(set.Inputs); } if (categorized.remaining != null) { nodes.Add(new SOMNode() { Position = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Position)), Weights = MathND.GetCenter(categorized.remaining.Select(o => o.Node.Weights)), }); newInputs.Add(categorized.remaining. SelectMany(o => o.Inputs). ToArray()); } return(new SOMResult(nodes.ToArray(), newInputs.ToArray(), false)); }
/// <summary> /// This pulls the low dimension positions toward ideal configurations based on the corresponding high /// dimension relationships (the links between nodes act like springs) /// </summary> private static SOMNode[] MoveNodes_BallOfSprings(SOMNode[] nodes, Tuple <int, int, double>[] desiredDistances, int numIterations) { VectorND[] positions = nodes. Select(o => o.Position). ToArray(); positions = MathND.ApplyBallOfSprings(positions, desiredDistances, numIterations); // Rebuild nodes return(Enumerable.Range(0, nodes.Length). Select(o => new SOMNode() { Weights = nodes[o].Weights, Position = positions[o] }). ToArray()); }
/// <summary> /// This does a bounding box of inputs, and also makes sure all positions are closer to the desired node than /// other nodes /// </summary> private static VectorND[] GetRandomWeights_InsideCell(int count, ISOMInput[] inputs, SOMNode[] nodes, int nodeIndex) { var inputAABB = MathND.GetAABB(inputs.Select(o => o.Weights)); // This actually could happen. Detecting an infinite loop instead //if (!MathND.IsInside(inputAABB, nodes[nodeIndex].Weights)) //{ // throw new ArgumentException("The node sits outside the input's aabb"); //} List <VectorND> retVal = new List <VectorND>(); for (int cntr = 0; cntr < count; cntr++) { int infiniteLoopDetector = 0; while (true) { VectorND attempt = MathND.GetRandomVector(inputAABB.Item1, inputAABB.Item2); var closest = nodes. Select((o, i) => new { Index = i, DistSquared = (attempt - o.Weights).LengthSquared }). OrderBy(o => o.DistSquared). First(); if (closest.Index == nodeIndex) { retVal.Add(attempt); break; } infiniteLoopDetector++; if (infiniteLoopDetector > 1000) { // Instead of giving up, increase the range that the weights can exist in. When testing this other method, there is almost never an improved // node. It's just an infinite loop in the caller (keeps trying for an improvement, but it never happens) return(GetRandomWeights_InsideCell_LARGERBUTFAIL(count, inputs, nodes, nodeIndex)); } } } return(retVal.ToArray()); }
private static void AdjustKMeansCenters(SOMNode[] nodes, ISOMInput[][] inputsByNode) { if (nodes.Length != inputsByNode.Length) { throw new ArgumentException("Arrays must be the same size"); } for (int cntr = 0; cntr < nodes.Length; cntr++) { if (inputsByNode[cntr].Length == 0) { // This happened when there were a bunch of identical images. Otherwise, it should never happen //throw new ArgumentException("Must have inputs for every node"); continue; } nodes[cntr].Weights = MathND.GetCenter(inputsByNode[cntr].Select(o => o.Weights)); } }
/// <summary> /// Returns the portion of this vector that lies along the other vector /// NOTE: The return will be the same direction as alongVector, but the length from zero to this vector's full length /// </summary> /// <remarks> /// This is copied from the Vector3D version /// </remarks> public static VectorND GetProjectedVector(this VectorND vector, VectorND alongVector, bool eitherDirection = true) { // c = (a dot unit(b)) * unit(b) if (vector.IsNearZero() || alongVector.IsNearZero()) { return(MathND.GetZeroVector(vector, alongVector)); } VectorND alongVectorUnit = alongVector.ToUnit(); double length = VectorND.DotProduct(vector, alongVectorUnit); if (!eitherDirection && length < 0) { // It's in the oppositie direction, and that isn't allowed return(MathND.GetZeroVector(vector, alongVector)); } return(alongVectorUnit * length); }
/// <summary> /// This washes the bits to the right with values approaching one /// </summary> /// <remarks> /// The leftmost bit is most significant, and needs to be returned acurately. The bits to the right don't matter as much, but /// the self organizing map just groups things together based on the pattern of the bits. So the bits to the right need to approach /// one (think of them as overidden by the bits to the left) /// /// I didn't want linear, I wanted something faster. So the bits to the right follow a sqrt curve (x axis scaled between 0 and 1 /// over the remaining bits) /// /// Example: /// If this trend toward one isn't there, then these two values would map close to each other (even though the first one represents /// 1, and the second could represent 201) /// 0 0 0 0 1 /// 0 .1 0 0 1 /// This method would turn these into something like: /// 0 0 0 0 1 /// 0 .1 .6 .95 1 --- bits to the right follow a sqrt toward 1 /// /// Instead of sqrt, it's actually between x^POWMIN and x^POWMAX. The value of the bit becomes a percent from min to max /// </remarks> private static double[] ConvertToVector_LeftSignificant(double value, SOMConvertToVectorProps props) { const double POWMIN = .1; const double POWMAX = .04; // Convert to a different base long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong); int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value); if (converted.Length == 0) { // Zero, return 0s return(Enumerable.Range(0, props.Width).Select(o => 0d).ToArray()); } else if (converted.Length > props.Width) { // Too big, return 1s double maxValue = value < 0 ? -1d : 1d; return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray()); } // Normalize so it's between -1 and 1 double[] normalized = new double[converted.Length]; double baseDbl = props.Number_BaseConvertTo.Value.ToDouble(); // Leftmost bit normalized[0] = converted[0].ToDouble() / baseDbl; double absFirst = Math.Abs(normalized[0]); // Bits to the right of the leftmost (their values are made to approach 1) if (converted.Length > 1) { // The sqrt will be between 0 and 1, so scale the x and y double yGap = 1d - absFirst; double xScale = 1d / (normalized.Length - 1); for (int cntr = 1; cntr < normalized.Length; cntr++) { // Y will be between these two curves double yMin = Math.Pow(cntr * xScale, POWMIN); double yMax = Math.Pow(cntr * xScale, POWMAX); // Treat this bit like a percent between the two curves double y = UtilityCore.GetScaledValue(yMin, yMax, 0, props.Number_BaseConvertTo.Value, Math.Abs(converted[cntr])); y *= yGap; y += absFirst; if (normalized[0] < 0) { y = -y; } normalized[cntr] = y; } } // Return, make sure the array is the right size if (normalized.Length < props.Width) { return(Enumerable.Range(0, props.Width - normalized.Length). Select(o => 0d). Concat(normalized). ToArray()); } else { return(normalized); } }
public static bool IsNearValue(this VectorND vector, VectorND other) { return(MathND.IsNearValue(vector.VectorArray, other.VectorArray)); }
/// <summary> /// Only call this from the other overload. This uses a much larger bounding box (it still makes sure all returned points /// are closer to the desired node than other nodes) /// </summary> private static VectorND[] GetRandomWeights_InsideCell_LARGERBUTFAIL(int count, ISOMInput[] inputs, SOMNode[] nodes, int nodeIndex) { //TODO: May want to get a voronoi, then choose random points within the convex hull (more elegant, but may not be any faster) // This works, but the caller generally never finds a solution that's better then what it already has. So if you need // to use this overload, you're just going to spin the processor #region calculate rectangle to choose points from // Get some nearby nodes double?largestDistance = Enumerable.Range(0, nodes.Length). Where(o => o != nodeIndex). Select(o => (nodes[o].Weights - nodes[nodeIndex].Weights).LengthSquared). OrderByDescending(o => o). FirstOrDefault(); var inputAABB = MathND.GetAABB(inputs.Select(o => o.Weights)); if (largestDistance == null) { largestDistance = Math.Max( (inputAABB.Item1 - nodes[nodeIndex].Weights).LengthSquared, (inputAABB.Item2 - nodes[nodeIndex].Weights).LengthSquared); } largestDistance = Math.Sqrt(largestDistance.Value); var bounds = MathND.GetAABB(new[] { nodes[nodeIndex].Weights.Select(o => o - largestDistance.Value).ToArray().ToVectorND(), nodes[nodeIndex].Weights.Select(o => o + largestDistance.Value).ToArray().ToVectorND(), inputAABB.Item1, inputAABB.Item2, }); #endregion List <VectorND> retVal = new List <VectorND>(); //int largestIteration = 0; for (int cntr = 0; cntr < count; cntr++) { int infiniteLoopDetector = 0; while (true) { //if(infiniteLoopDetector > largestIteration) //{ // largestIteration = infiniteLoopDetector; // I tested several times, and averaged about 5-15 iterations //} VectorND attempt = MathND.GetRandomVector(bounds.Item1, bounds.Item2); var closest = nodes. Select((o, i) => new { Index = i, DistSquared = (attempt - o.Weights).LengthSquared }). OrderBy(o => o.DistSquared). First(); if (closest.Index == nodeIndex) { retVal.Add(attempt); break; } infiniteLoopDetector++; if (infiniteLoopDetector > 1000) { throw new ApplicationException("Infinite loop detected"); } } } return(retVal.ToArray()); }