예제 #1
0
        /// <summary>
        /// This converts the value into a normalized vector (values from -1 to 1 in each dimension)
        /// </summary>
        /// <remarks>
        /// This is useful if you want to convert numbers into vectors
        ///
        /// Say you want to do a SOM against a database.  Each column needs to be mapped to a vector.  Then all vectors of a row will get
        /// stitched together to be one intance of ISOMInput.Weights
        ///
        /// If one of the columns is numeric (maybe dollars or quantities), then you would use this method
        ///
        /// The first step would be to prequery so see what the range of possible values are.  Run that maximum expected value through
        /// GetConvertBaseProps() to figure out what base to represent the numbers as.  This method converts the number to that base,
        /// then normalizes each digit to -1 to 1 (sort of like percent of base)
        /// </remarks>
        private static double[] ConvertToVector_Direct(double value, SOMConvertToVectorProps props)
        {
            // Convert to a different base
            long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong);

            int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value);

            // Too big, return 1s
            if (converted.Length > props.Width)
            {
                double maxValue = value < 0 ? -1d : 1d;
                return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray());
            }

            // Normalize (treat each item like a percent)
            double baseDbl = props.Number_BaseConvertTo.Value.ToDouble();

            double[] normalized = converted.
                                  Select(o => o.ToDouble() / baseDbl).
                                  ToArray();

            // Return, make sure the array is the right size
            if (normalized.Length < props.Width)
            {
                return(Enumerable.Range(0, props.Width - normalized.Length).
                       Select(o => 0d).
                       Concat(normalized).
                       ToArray());
            }
            else
            {
                return(normalized);
            }
        }
예제 #2
0
        /// <summary>
        /// This overload converts text to a vector
        /// </summary>
        /// <param name="uniqueNonWhitespace">This is a list of all possible characters that could be encountered (but not any characters that would cause IsWhitespace to return true)</param>
        private static double[] ConvertToVector_Text(string text, SOMConvertToVectorProps props, char[] uniqueNonWhitespace)
        {
            if (uniqueNonWhitespace.Length == 0)
            {
                // It's all zeros
                return(new double[props.Width]);
            }

            int[] numbers = ConvertToVector_Text_Number(text, uniqueNonWhitespace);

            double[] normalized = ConvertToVector_Text_Normalize(numbers, uniqueNonWhitespace.Length);

            return(ConvertToVector_Text_Fit(normalized, props.Width, props.Text_Justification.Value));
        }
예제 #3
0
        public static double[] ConvertToVector(string text, SOMFieldStats stats, SOMConvertToVectorProps convertProps)
        {
            switch (stats.FieldType)
            {
            case SOMFieldType.Integer:
            case SOMFieldType.FloatingPoint:
                #region numeric

                double castDbl1 = string.IsNullOrWhiteSpace(text) ? 0d : double.Parse(text.Trim());

                return(ConvertToVector_LeftSignificant(castDbl1, convertProps));

                #endregion

            case SOMFieldType.DateTime:
                #region date

                DateTime castDt;
                if (!DateTime.TryParse(text, out castDt))
                {
                    castDt = stats.Date_Min.Value;
                }

                double castDbl2 = (castDt - stats.Date_Min.Value).TotalDays;            // convertProps was built from "(stats.Date_Max - stats.Date_Min).TotalDays"

                return(ConvertToVector_LeftSignificant(castDbl2, convertProps));

                #endregion

            case SOMFieldType.AlphaNumeric:
            case SOMFieldType.AnyText:
                #region text

                return(ConvertToVector_Text(text, convertProps, stats.UniqueChars_NonWhitespace));

                #endregion

            default:
                throw new ApplicationException("finish this: " + stats.FieldType.ToString());
            }
        }
예제 #4
0
        /// <summary>
        /// This washes the bits to the right with values approaching one
        /// </summary>
        /// <remarks>
        /// The leftmost bit is most significant, and needs to be returned acurately.  The bits to the right don't matter as much, but
        /// the self organizing map just groups things together based on the pattern of the bits.  So the bits to the right need to approach
        /// one (think of them as overidden by the bits to the left)
        ///
        /// I didn't want linear, I wanted something faster.  So the bits to the right follow a sqrt curve (x axis scaled between 0 and 1
        /// over the remaining bits)
        ///
        /// Example:
        ///     If this trend toward one isn't there, then these two values would map close to each other (even though the first one represents
        ///     1, and the second could represent 201)
        ///         0 0 0 0 1
        ///         0 .1 0 0 1
        ///     This method would turn these into something like:
        ///         0 0 0 0 1
        ///         0 .1 .6 .95 1       --- bits to the right follow a sqrt toward 1
        ///
        /// Instead of sqrt, it's actually between x^POWMIN and x^POWMAX.  The value of the bit becomes a percent from min to max
        /// </remarks>
        private static double[] ConvertToVector_LeftSignificant(double value, SOMConvertToVectorProps props)
        {
            const double POWMIN = .1;
            const double POWMAX = .04;

            // Convert to a different base
            long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong);

            int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value);

            if (converted.Length == 0)
            {
                // Zero, return 0s
                return(Enumerable.Range(0, props.Width).Select(o => 0d).ToArray());
            }
            else if (converted.Length > props.Width)
            {
                // Too big, return 1s
                double maxValue = value < 0 ? -1d : 1d;
                return(Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray());
            }

            // Normalize so it's between -1 and 1
            double[] normalized = new double[converted.Length];

            double baseDbl = props.Number_BaseConvertTo.Value.ToDouble();

            // Leftmost bit
            normalized[0] = converted[0].ToDouble() / baseDbl;
            double absFirst = Math.Abs(normalized[0]);

            // Bits to the right of the leftmost (their values are made to approach 1)
            if (converted.Length > 1)
            {
                // The sqrt will be between 0 and 1, so scale the x and y
                double yGap   = 1d - absFirst;
                double xScale = 1d / (normalized.Length - 1);

                for (int cntr = 1; cntr < normalized.Length; cntr++)
                {
                    // Y will be between these two curves
                    double yMin = Math.Pow(cntr * xScale, POWMIN);
                    double yMax = Math.Pow(cntr * xScale, POWMAX);

                    // Treat this bit like a percent between the two curves
                    double y = UtilityCore.GetScaledValue(yMin, yMax, 0, props.Number_BaseConvertTo.Value, Math.Abs(converted[cntr]));

                    y *= yGap;
                    y += absFirst;

                    if (normalized[0] < 0)
                    {
                        y = -y;
                    }

                    normalized[cntr] = y;
                }
            }

            // Return, make sure the array is the right size
            if (normalized.Length < props.Width)
            {
                return(Enumerable.Range(0, props.Width - normalized.Length).
                       Select(o => 0d).
                       Concat(normalized).
                       ToArray());
            }
            else
            {
                return(normalized);
            }
        }
        /// <summary>
        /// This overload converts text to a vector
        /// </summary>
        /// <param name="uniqueNonWhitespace">This is a list of all possible characters that could be encountered (but not any characters that would cause IsWhitespace to return true)</param>  
        private static double[] ConvertToVector_Text(string text, SOMConvertToVectorProps props, char[] uniqueNonWhitespace)
        {
            if (uniqueNonWhitespace.Length == 0)
            {
                // It's all zeros
                return new double[props.Width];
            }

            int[] numbers = ConvertToVector_Text_Number(text, uniqueNonWhitespace);

            double[] normalized = ConvertToVector_Text_Normalize(numbers, uniqueNonWhitespace.Length);

            return ConvertToVector_Text_Fit(normalized, props.Width, props.Text_Justification.Value);
        }
        /// <summary>
        /// This washes the bits to the right with values approaching one
        /// </summary>
        /// <remarks>
        /// The leftmost bit is most significant, and needs to be returned acurately.  The bits to the right don't matter as much, but
        /// the self organizing map just groups things together based on the pattern of the bits.  So the bits to the right need to approach
        /// one (think of them as overidden by the bits to the left)
        /// 
        /// I didn't want linear, I wanted something faster.  So the bits to the right follow a sqrt curve (x axis scaled between 0 and 1
        /// over the remaining bits)
        /// 
        /// Example:
        ///     If this trend toward one isn't there, then these two values would map close to each other (even though the first one represents
        ///     1, and the second could represent 201)
        ///         0 0 0 0 1
        ///         0 .1 0 0 1
        ///     This method would turn these into something like:
        ///         0 0 0 0 1
        ///         0 .1 .6 .95 1       --- bits to the right follow a sqrt toward 1
        ///         
        /// Instead of sqrt, it's actually between x^POWMIN and x^POWMAX.  The value of the bit becomes a percent from min to max
        /// </remarks>
        private static double[] ConvertToVector_LeftSignificant(double value, SOMConvertToVectorProps props)
        {
            const double POWMIN = .1;
            const double POWMAX = .04;

            // Convert to a different base
            long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong);
            int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value);

            if (converted.Length == 0)
            {
                // Zero, return 0s
                return Enumerable.Range(0, props.Width).Select(o => 0d).ToArray();
            }
            else if (converted.Length > props.Width)
            {
                // Too big, return 1s
                double maxValue = value < 0 ? -1d : 1d;
                return Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray();
            }

            // Normalize so it's between -1 and 1
            double[] normalized = new double[converted.Length];

            double baseDbl = props.Number_BaseConvertTo.Value.ToDouble();

            // Leftmost bit
            normalized[0] = converted[0].ToDouble() / baseDbl;
            double absFirst = Math.Abs(normalized[0]);

            // Bits to the right of the leftmost (their values are made to approach 1)
            if (converted.Length > 1)
            {
                // The sqrt will be between 0 and 1, so scale the x and y
                double yGap = 1d - absFirst;
                double xScale = 1d / (normalized.Length - 1);

                for (int cntr = 1; cntr < normalized.Length; cntr++)
                {
                    // Y will be between these two curves
                    double yMin = Math.Pow(cntr * xScale, POWMIN);
                    double yMax = Math.Pow(cntr * xScale, POWMAX);

                    // Treat this bit like a percent between the two curves
                    double y = UtilityCore.GetScaledValue(yMin, yMax, 0, props.Number_BaseConvertTo.Value, Math.Abs(converted[cntr]));

                    y *= yGap;
                    y += absFirst;

                    if (normalized[0] < 0)
                    {
                        y = -y;
                    }

                    normalized[cntr] = y;
                }
            }

            // Return, make sure the array is the right size
            if (normalized.Length < props.Width)
            {
                return Enumerable.Range(0, props.Width - normalized.Length).
                    Select(o => 0d).
                    Concat(normalized).
                    ToArray();
            }
            else
            {
                return normalized;
            }
        }
        /// <summary>
        /// This converts the value into a normalized vector (values from -1 to 1 in each dimension)
        /// </summary>
        /// <remarks>
        /// This is useful if you want to convert numbers into vectors
        /// 
        /// Say you want to do a SOM against a database.  Each column needs to be mapped to a vector.  Then all vectors of a row will get
        /// stitched together to be one intance of ISOMInput.Weights
        /// 
        /// If one of the columns is numeric (maybe dollars or quantities), then you would use this method
        /// 
        /// The first step would be to prequery so see what the range of possible values are.  Run that maximum expected value through
        /// GetConvertBaseProps() to figure out what base to represent the numbers as.  This method converts the number to that base,
        /// then normalizes each digit to -1 to 1 (sort of like percent of base)
        /// </remarks>
        private static double[] ConvertToVector_Direct(double value, SOMConvertToVectorProps props)
        {
            // Convert to a different base
            long scaledValue = Convert.ToInt64(value * props.Number_ScaleToLong);
            int[] converted = MathND.ConvertToBase(scaledValue, props.Number_BaseConvertTo.Value);

            // Too big, return 1s
            if (converted.Length > props.Width)
            {
                double maxValue = value < 0 ? -1d : 1d;
                return Enumerable.Range(0, props.Width).Select(o => maxValue).ToArray();
            }

            // Normalize (treat each item like a percent)
            double baseDbl = props.Number_BaseConvertTo.Value.ToDouble();

            double[] normalized = converted.
                Select(o => o.ToDouble() / baseDbl).
                ToArray();

            // Return, make sure the array is the right size
            if (normalized.Length < props.Width)
            {
                return Enumerable.Range(0, props.Width - normalized.Length).
                    Select(o => 0d).
                    Concat(normalized).
                    ToArray();
            }
            else
            {
                return normalized;
            }
        }
        public static double[] ConvertToVector(string text, SOMFieldStats stats, SOMConvertToVectorProps convertProps)
        {
            switch (stats.FieldType)
            {
                case SOMFieldType.Integer:
                case SOMFieldType.FloatingPoint:
                    #region numeric

                    double castDbl1 = string.IsNullOrWhiteSpace(text) ? 0d : double.Parse(text.Trim());

                    return ConvertToVector_LeftSignificant(castDbl1, convertProps);

                    #endregion

                case SOMFieldType.DateTime:
                    #region date

                    DateTime castDt;
                    if (!DateTime.TryParse(text, out castDt))
                    {
                        castDt = stats.Date_Min.Value;
                    }

                    double castDbl2 = (castDt - stats.Date_Min.Value).TotalDays;        // convertProps was built from "(stats.Date_Max - stats.Date_Min).TotalDays"

                    return ConvertToVector_LeftSignificant(castDbl2, convertProps);

                    #endregion

                case SOMFieldType.AlphaNumeric:
                case SOMFieldType.AnyText:
                    #region text

                    return ConvertToVector_Text(text, convertProps, stats.UniqueChars_NonWhitespace);

                    #endregion

                default:
                    throw new ApplicationException("finish this: " + stats.FieldType.ToString());
            }
        }
        private static Tuple<ColumnStats, SOMFieldStats, SOMConvertToVectorProps>[] GetFinalColumnStats(ColumnStats[] columns)
        {
            var retVal = new Tuple<ColumnStats, SOMFieldStats, SOMConvertToVectorProps>[columns.Length];

            for (int cntr = 0; cntr < columns.Length; cntr++)
            {
                // Field
                SOMFieldStats field = null;
                if (columns[cntr].ForceText)
                {
                    field = columns[cntr].FieldStatsText;
                }
                else
                {
                    field = columns[cntr].FieldStats;
                }

                //TODO: look at overrides



                // Convert
                SOMConvertToVectorProps convertProps;

                switch (field.FieldType)
                {
                    case SOMFieldType.AlphaNumeric:
                    case SOMFieldType.AnyText:
                        convertProps = new SOMConvertToVectorProps(columns[cntr].Width, columns[cntr].Override_Text_Justify ?? TextAlignment.Center);
                        break;

                    case SOMFieldType.DateTime:
                        convertProps = SelfOrganizingMapsDB.GetConvertToProps(field.Date_Min.Value, field.Date_Max.Value, columns[cntr].Width);
                        break;

                    case SOMFieldType.FloatingPoint:
                    case SOMFieldType.Integer:
                        convertProps = SelfOrganizingMapsDB.GetConvertToProps(field.Numeric_Min.Value, field.Numeric_Max.Value, columns[cntr].Width);
                        break;

                    default:
                        throw new ApplicationException("Unknown SOMFieldType: " + field.FieldType.ToString());
                }

                // Build it
                retVal[cntr] = Tuple.Create(columns[cntr], field, convertProps);
            }

            return retVal;
        }