示例#1
0
        /// <summary>
        /// Generic parameter parser. Currently hand-hacked to auto-detect type.
        ///
        /// Generic form:   Name:Values
        /// for example,    lr:0.05-0.4
        ///          lambda:0.1-1000@log10
        ///          nl:2-64@log2
        ///          norm:-,+
        /// </summary>
        /// REVIEW: allow overriding auto-detection to specify type
        /// and delegate to parameter type for actual parsing
        /// REVIEW: specifying ordinal discrete parameters
        public bool TryParseParameter(string paramValue, Type paramType, string paramName, out IValueGenerator sweepValues, out string error)
        {
            sweepValues = null;
            error       = null;

            if (paramValue.Contains(','))
            {
                var generatorArgs = new DiscreteParamArguments();
                generatorArgs.Name   = paramName;
                generatorArgs.Values = paramValue.Split(',');
                sweepValues          = new DiscreteValueGenerator(generatorArgs);
                return(true);
            }

            // numeric parameter
            if (!CmdParser.IsNumericType(paramType))
            {
                return(false);
            }

            // REVIEW:  deal with negative bounds
            string scaleStr = null;
            int    atIdx    = paramValue.IndexOf('@');

            if (atIdx < 0)
            {
                atIdx = paramValue.IndexOf(';');
            }
            if (atIdx >= 0)
            {
                scaleStr   = paramValue.Substring(atIdx + 1);
                paramValue = paramValue.Substring(0, atIdx);
                if (scaleStr.Length < 1)
                {
                    error = $"Could not parse sweep range for parameter: {paramName}";
                    return(false);
                }
            }

            // Extract the minimum, and the maximum value of the list of suggested sweeps.
            // Positive lookahead splitting at the '-' character.
            // It is used for the Float and Long param types.
            // Example format: "0.02-0.1;steps:5".
            string[] minMaxRegex = Regex.Split(paramValue, "(?<=[^eE])-");
            if (minMaxRegex.Length != 2)
            {
                if (minMaxRegex.Length > 2)
                {
                    error = $"Could not parse sweep range for parameter: {paramName}";
                }

                return(false);
            }
            string minStr = minMaxRegex[0];
            string maxStr = minMaxRegex[1];

            int    numSteps = 100;
            Double stepSize = -1;
            bool   logBase  = false;

            if (scaleStr != null)
            {
                try
                {
                    string[] options          = scaleStr.Split(';');
                    bool[]   optionsSpecified = new bool[3];
                    foreach (string option in options)
                    {
                        if (option.StartsWith("log") && !option.StartsWith("log-") && !option.StartsWith("log:-"))
                        {
                            logBase             = true;
                            optionsSpecified[0] = true;
                        }
                        if (option.StartsWith("steps"))
                        {
                            numSteps            = int.Parse(option.Substring(option.IndexOf(':') + 1));
                            optionsSpecified[1] = true;
                        }
                        if (option.StartsWith("inc"))
                        {
                            stepSize            = Double.Parse(option.Substring(option.IndexOf(':') + 1), CultureInfo.InvariantCulture);
                            optionsSpecified[2] = true;
                        }
                    }
                    if (options.Length != optionsSpecified.Count(b => b))
                    {
                        error = $"Could not parse sweep range for parameter: {paramName}";
                        return(false);
                    }
                }
                catch (Exception e)
                {
                    error = $"Error creating sweep generator for parameter '{paramName}': {e.Message}";
                    return(false);
                }
            }

            if (paramType == typeof(UInt16) ||
                paramType == typeof(UInt32) ||
                paramType == typeof(UInt64) ||
                paramType == typeof(short) ||
                paramType == typeof(int) ||
                paramType == typeof(long))
            {
                long min;
                long max;
                if (!long.TryParse(minStr, out min) || !long.TryParse(maxStr, out max))
                {
                    return(false);
                }
                var generatorArgs = new Microsoft.ML.Sweeper.LongParamArguments();
                generatorArgs.Name     = paramName;
                generatorArgs.Min      = min;
                generatorArgs.Max      = max;
                generatorArgs.NumSteps = numSteps;
                generatorArgs.StepSize = (stepSize > 0 ? stepSize : new Nullable <Double>());
                generatorArgs.LogBase  = logBase;

                try
                {
                    sweepValues = new LongValueGenerator(generatorArgs);
                }
                catch (Exception e)
                {
                    error = $"Error creating sweep generator for parameter '{paramName}': {e.Message}";
                    return(false);
                }
            }
            else
            {
                Float minF;
                Float maxF;
                if (!Float.TryParse(minStr, out minF) || !Float.TryParse(maxStr, out maxF))
                {
                    return(false);
                }
                var floatArgs = new FloatParamArguments();
                floatArgs.Name     = paramName;
                floatArgs.Min      = minF;
                floatArgs.Max      = maxF;
                floatArgs.NumSteps = numSteps;
                floatArgs.StepSize = (stepSize > 0 ? stepSize : new Nullable <Double>());
                floatArgs.LogBase  = logBase;

                try
                {
                    sweepValues = new FloatValueGenerator(floatArgs);
                }
                catch (Exception e)
                {
                    error = $"Error creating sweep generator for parameter '{paramName}': {e.Message}";
                    return(false);
                }
            }
            return(true);
        }
            private void ApplyCore(ReadOnlyMemory <char>[][] data, Column column)
            {
                bool numericColumn = CmdParser.IsNumericType(column.Kind?.ToType());
                //Statistics for numeric column or length of the text in the case of non-numeric column.
                Stats stats = new Stats();
                //Statistics for number of spaces in the case non-numeric column.
                Stats spacesStats = new Stats();

                foreach (int index in column.Indices)
                {
                    if (index >= data.GetLength(0))
                    {
                        break;
                    }

                    foreach (ReadOnlyMemory <char> value in data[index])
                    {
                        string columnPurposeString = column.Purpose;
                        Stats  statsPerPurpose;
                        Stats  statsPerPurposeSpaces;
                        if (!StatsPerColumnPurpose.ContainsKey(columnPurposeString))
                        {
                            statsPerPurpose       = new Stats();
                            statsPerPurposeSpaces = new Stats();
                            StatsPerColumnPurpose.Add(columnPurposeString, statsPerPurpose);
                            StatsPerColumnPurposeWithSpaces.Add(columnPurposeString, statsPerPurposeSpaces);
                        }
                        else
                        {
                            statsPerPurpose       = StatsPerColumnPurpose[columnPurposeString];
                            statsPerPurposeSpaces = StatsPerColumnPurposeWithSpaces[columnPurposeString];
                        }

                        string valueString = value.ToString();
                        if (numericColumn)
                        {
                            double valueLocal;
                            if (Double.TryParse(valueString, out valueLocal))
                            {
                                stats.Add(valueLocal);
                                statsPerPurpose.Add(valueLocal);
                            }
                        }
                        else
                        {
                            stats.Add(valueString.Length);
                            statsPerPurpose.Add(valueString.Length);
                            int spacesCount = valueString.Count(c => c == ' ');
                            spacesStats.Add(spacesCount);
                            statsPerPurposeSpaces.Add(spacesCount);
                        }
                    }
                }

                if (numericColumn)
                {
                    NumericColumnFeatures.Add(new ColumnStatistics {
                        Column = column, Stats = stats
                    });
                }
                else
                {
                    NonNumericColumnLengthFeature.Add(new ColumnStatistics {
                        Column = column, Stats = stats
                    });
                    NonNumericColumnSpacesFeature.Add(new ColumnStatistics {
                        Column = column, Stats = spacesStats
                    });
                }
            }