/// <summary> /// This converts the value into a normalized vector (values from -1 to 1 in each dimension) /// </summary> /// <remarks> /// This is useful if you want to convert numbers into vectors /// /// Say you want to do a SOM against a database. Each column needs to be mapped to a vector. Then all vectors of a row will get /// stitched together to be one intance of ISOMInput.Weights /// /// If one of the columns is numeric (maybe dollars or quantities), then you would use this method /// /// The first step would be to prequery so see what the range of possible values are. Run that maximum expected value through /// GetConvertBaseProps() to figure out what base to represent the numbers as. This method converts the number to that base, /// then normalizes each digit to -1 to 1 (sort of like percent of base) /// </remarks> private static double[] ConvertToVector_Direct(double value, SOMConvertToVectorProps props) { // Convert to a different base long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong); int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value); // Too big, return 1s if (converted.Length > props.Width) { double maxValue = value < 0 ? -1d : 1d; return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray()); } // Normalize (treat each item like a percent) double baseDbl = props.Number_BaseConvertTo.Value.ToDouble(); double[] normalized = converted. Select(o => o.ToDouble() / baseDbl). ToArray(); // Return, make sure the array is the right size if (normalized.Length < props.Width) { return(Enumerable.Range(0, props.Width - normalized.Length). Select(o => 0d). Concat(normalized). ToArray()); } else { return(normalized); } }
/// <summary> /// This overload converts text to a vector /// </summary> /// <param name="uniqueNonWhitespace">This is a list of all possible characters that could be encountered (but not any characters that would cause IsWhitespace to return true)</param> private static double[] ConvertToVector_Text(string text, SOMConvertToVectorProps props, char[] uniqueNonWhitespace) { if (uniqueNonWhitespace.Length == 0) { // It's all zeros return(new double[props.Width]); } int[] numbers = ConvertToVector_Text_Number(text, uniqueNonWhitespace); double[] normalized = ConvertToVector_Text_Normalize(numbers, uniqueNonWhitespace.Length); return(ConvertToVector_Text_Fit(normalized, props.Width, props.Text_Justification.Value)); }
public static double[] ConvertToVector(string text, SOMFieldStats stats, SOMConvertToVectorProps convertProps) { switch (stats.FieldType) { case SOMFieldType.Integer: case SOMFieldType.FloatingPoint: #region numeric double castDbl1 = string.IsNullOrWhiteSpace(text) ? 0d : double.Parse(text.Trim()); return(ConvertToVector_LeftSignificant(castDbl1, convertProps)); #endregion case SOMFieldType.DateTime: #region date DateTime castDt; if (!DateTime.TryParse(text, out castDt)) { castDt = stats.Date_Min.Value; } double castDbl2 = (castDt - stats.Date_Min.Value).TotalDays; // convertProps was built from "(stats.Date_Max - stats.Date_Min).TotalDays" return(ConvertToVector_LeftSignificant(castDbl2, convertProps)); #endregion case SOMFieldType.AlphaNumeric: case SOMFieldType.AnyText: #region text return(ConvertToVector_Text(text, convertProps, stats.UniqueChars_NonWhitespace)); #endregion default: throw new ApplicationException("finish this: " + stats.FieldType.ToString()); } }
/// <summary> /// This washes the bits to the right with values approaching one /// </summary> /// <remarks> /// The leftmost bit is most significant, and needs to be returned acurately. The bits to the right don't matter as much, but /// the self organizing map just groups things together based on the pattern of the bits. So the bits to the right need to approach /// one (think of them as overidden by the bits to the left) /// /// I didn't want linear, I wanted something faster. So the bits to the right follow a sqrt curve (x axis scaled between 0 and 1 /// over the remaining bits) /// /// Example: /// If this trend toward one isn't there, then these two values would map close to each other (even though the first one represents /// 1, and the second could represent 201) /// 0 0 0 0 1 /// 0 .1 0 0 1 /// This method would turn these into something like: /// 0 0 0 0 1 /// 0 .1 .6 .95 1 --- bits to the right follow a sqrt toward 1 /// /// Instead of sqrt, it's actually between x^POWMIN and x^POWMAX. The value of the bit becomes a percent from min to max /// </remarks> private static double[] ConvertToVector_LeftSignificant(double value, SOMConvertToVectorProps props) { const double POWMIN = .1; const double POWMAX = .04; // Convert to a different base long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong); int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value); if (converted.Length == 0) { // Zero, return 0s return(Enumerable.Range(0, props.Width).Select(o => 0d).ToArray()); } else if (converted.Length > props.Width) { // Too big, return 1s double maxValue = value < 0 ? -1d : 1d; return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray()); } // Normalize so it's between -1 and 1 double[] normalized = new double[converted.Length]; double baseDbl = props.Number_BaseConvertTo.Value.ToDouble(); // Leftmost bit normalized[0] = converted[0].ToDouble() / baseDbl; double absFirst = Math.Abs(normalized[0]); // Bits to the right of the leftmost (their values are made to approach 1) if (converted.Length > 1) { // The sqrt will be between 0 and 1, so scale the x and y double yGap = 1d - absFirst; double xScale = 1d / (normalized.Length - 1); for (int cntr = 1; cntr < normalized.Length; cntr++) { // Y will be between these two curves double yMin = Math.Pow(cntr * xScale, POWMIN); double yMax = Math.Pow(cntr * xScale, POWMAX); // Treat this bit like a percent between the two curves double y = UtilityCore.GetScaledValue(yMin, yMax, 0, props.Number_BaseConvertTo.Value, Math.Abs(converted[cntr])); y *= yGap; y += absFirst; if (normalized[0] < 0) { y = -y; } normalized[cntr] = y; } } // Return, make sure the array is the right size if (normalized.Length < props.Width) { return(Enumerable.Range(0, props.Width - normalized.Length). Select(o => 0d). Concat(normalized). ToArray()); } else { return(normalized); } }
/// <summary> /// This overload converts text to a vector /// </summary> /// <param name="uniqueNonWhitespace">This is a list of all possible characters that could be encountered (but not any characters that would cause IsWhitespace to return true)</param> private static double[] ConvertToVector_Text(string text, SOMConvertToVectorProps props, char[] uniqueNonWhitespace) { if (uniqueNonWhitespace.Length == 0) { // It's all zeros return new double[props.Width]; } int[] numbers = ConvertToVector_Text_Number(text, uniqueNonWhitespace); double[] normalized = ConvertToVector_Text_Normalize(numbers, uniqueNonWhitespace.Length); return ConvertToVector_Text_Fit(normalized, props.Width, props.Text_Justification.Value); }
/// <summary> /// This washes the bits to the right with values approaching one /// </summary> /// <remarks> /// The leftmost bit is most significant, and needs to be returned acurately. The bits to the right don't matter as much, but /// the self organizing map just groups things together based on the pattern of the bits. So the bits to the right need to approach /// one (think of them as overidden by the bits to the left) /// /// I didn't want linear, I wanted something faster. So the bits to the right follow a sqrt curve (x axis scaled between 0 and 1 /// over the remaining bits) /// /// Example: /// If this trend toward one isn't there, then these two values would map close to each other (even though the first one represents /// 1, and the second could represent 201) /// 0 0 0 0 1 /// 0 .1 0 0 1 /// This method would turn these into something like: /// 0 0 0 0 1 /// 0 .1 .6 .95 1 --- bits to the right follow a sqrt toward 1 /// /// Instead of sqrt, it's actually between x^POWMIN and x^POWMAX. The value of the bit becomes a percent from min to max /// </remarks> private static double[] ConvertToVector_LeftSignificant(double value, SOMConvertToVectorProps props) { const double POWMIN = .1; const double POWMAX = .04; // Convert to a different base long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong); int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value); if (converted.Length == 0) { // Zero, return 0s return Enumerable.Range(0, props.Width).Select(o => 0d).ToArray(); } else if (converted.Length > props.Width) { // Too big, return 1s double maxValue = value < 0 ? -1d : 1d; return Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray(); } // Normalize so it's between -1 and 1 double[] normalized = new double[converted.Length]; double baseDbl = props.Number_BaseConvertTo.Value.ToDouble(); // Leftmost bit normalized[0] = converted[0].ToDouble() / baseDbl; double absFirst = Math.Abs(normalized[0]); // Bits to the right of the leftmost (their values are made to approach 1) if (converted.Length > 1) { // The sqrt will be between 0 and 1, so scale the x and y double yGap = 1d - absFirst; double xScale = 1d / (normalized.Length - 1); for (int cntr = 1; cntr < normalized.Length; cntr++) { // Y will be between these two curves double yMin = Math.Pow(cntr * xScale, POWMIN); double yMax = Math.Pow(cntr * xScale, POWMAX); // Treat this bit like a percent between the two curves double y = UtilityCore.GetScaledValue(yMin, yMax, 0, props.Number_BaseConvertTo.Value, Math.Abs(converted[cntr])); y *= yGap; y += absFirst; if (normalized[0] < 0) { y = -y; } normalized[cntr] = y; } } // Return, make sure the array is the right size if (normalized.Length < props.Width) { return Enumerable.Range(0, props.Width - normalized.Length). Select(o => 0d). Concat(normalized). ToArray(); } else { return normalized; } }
/// <summary> /// This converts the value into a normalized vector (values from -1 to 1 in each dimension) /// </summary> /// <remarks> /// This is useful if you want to convert numbers into vectors /// /// Say you want to do a SOM against a database. Each column needs to be mapped to a vector. Then all vectors of a row will get /// stitched together to be one intance of ISOMInput.Weights /// /// If one of the columns is numeric (maybe dollars or quantities), then you would use this method /// /// The first step would be to prequery so see what the range of possible values are. Run that maximum expected value through /// GetConvertBaseProps() to figure out what base to represent the numbers as. This method converts the number to that base, /// then normalizes each digit to -1 to 1 (sort of like percent of base) /// </remarks> private static double[] ConvertToVector_Direct(double value, SOMConvertToVectorProps props) { // Convert to a different base long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong); int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value); // Too big, return 1s if (converted.Length > props.Width) { double maxValue = value < 0 ? -1d : 1d; return Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray(); } // Normalize (treat each item like a percent) double baseDbl = props.Number_BaseConvertTo.Value.ToDouble(); double[] normalized = converted. Select(o => o.ToDouble() / baseDbl). ToArray(); // Return, make sure the array is the right size if (normalized.Length < props.Width) { return Enumerable.Range(0, props.Width - normalized.Length). Select(o => 0d). Concat(normalized). ToArray(); } else { return normalized; } }
public static double[] ConvertToVector(string text, SOMFieldStats stats, SOMConvertToVectorProps convertProps) { switch (stats.FieldType) { case SOMFieldType.Integer: case SOMFieldType.FloatingPoint: #region numeric double castDbl1 = string.IsNullOrWhiteSpace(text) ? 0d : double.Parse(text.Trim()); return ConvertToVector_LeftSignificant(castDbl1, convertProps); #endregion case SOMFieldType.DateTime: #region date DateTime castDt; if (!DateTime.TryParse(text, out castDt)) { castDt = stats.Date_Min.Value; } double castDbl2 = (castDt - stats.Date_Min.Value).TotalDays; // convertProps was built from "(stats.Date_Max - stats.Date_Min).TotalDays" return ConvertToVector_LeftSignificant(castDbl2, convertProps); #endregion case SOMFieldType.AlphaNumeric: case SOMFieldType.AnyText: #region text return ConvertToVector_Text(text, convertProps, stats.UniqueChars_NonWhitespace); #endregion default: throw new ApplicationException("finish this: " + stats.FieldType.ToString()); } }
private static Tuple<ColumnStats, SOMFieldStats, SOMConvertToVectorProps>[] GetFinalColumnStats(ColumnStats[] columns) { var retVal = new Tuple<ColumnStats, SOMFieldStats, SOMConvertToVectorProps>[columns.Length]; for (int cntr = 0; cntr < columns.Length; cntr++) { // Field SOMFieldStats field = null; if (columns[cntr].ForceText) { field = columns[cntr].FieldStatsText; } else { field = columns[cntr].FieldStats; } //TODO: look at overrides // Convert SOMConvertToVectorProps convertProps; switch (field.FieldType) { case SOMFieldType.AlphaNumeric: case SOMFieldType.AnyText: convertProps = new SOMConvertToVectorProps(columns[cntr].Width, columns[cntr].Override_Text_Justify ?? TextAlignment.Center); break; case SOMFieldType.DateTime: convertProps = SelfOrganizingMapsDB.GetConvertToProps(field.Date_Min.Value, field.Date_Max.Value, columns[cntr].Width); break; case SOMFieldType.FloatingPoint: case SOMFieldType.Integer: convertProps = SelfOrganizingMapsDB.GetConvertToProps(field.Numeric_Min.Value, field.Numeric_Max.Value, columns[cntr].Width); break; default: throw new ApplicationException("Unknown SOMFieldType: " + field.FieldType.ToString()); } // Build it retVal[cntr] = Tuple.Create(columns[cntr], field, convertProps); } return retVal; }