/// <summary> /// Generic parameter parser. Currently hand-hacked to auto-detect type. /// /// Generic form: Name:Values /// for example, lr:0.05-0.4 /// lambda:0.1-1000@log10 /// nl:2-64@log2 /// norm:-,+ /// </summary> /// REVIEW: allow overriding auto-detection to specify type /// and delegate to parameter type for actual parsing /// REVIEW: specifying ordinal discrete parameters public bool TryParseParameter(string paramValue, Type paramType, string paramName, out IValueGenerator sweepValues, out string error) { sweepValues = null; error = null; if (paramValue.Contains(',')) { var generatorArgs = new DiscreteParamArguments(); generatorArgs.Name = paramName; generatorArgs.Values = paramValue.Split(','); sweepValues = new DiscreteValueGenerator(generatorArgs); return(true); } // numeric parameter if (!CmdParser.IsNumericType(paramType)) { return(false); } // REVIEW: deal with negative bounds string scaleStr = null; int atIdx = paramValue.IndexOf('@'); if (atIdx < 0) { atIdx = paramValue.IndexOf(';'); } if (atIdx >= 0) { scaleStr = paramValue.Substring(atIdx + 1); paramValue = paramValue.Substring(0, atIdx); if (scaleStr.Length < 1) { error = $"Could not parse sweep range for parameter: {paramName}"; return(false); } } // Extract the minimum, and the maximum value of the list of suggested sweeps. // Positive lookahead splitting at the '-' character. // It is used for the Float and Long param types. // Example format: "0.02-0.1;steps:5". string[] minMaxRegex = Regex.Split(paramValue, "(?<=[^eE])-"); if (minMaxRegex.Length != 2) { if (minMaxRegex.Length > 2) { error = $"Could not parse sweep range for parameter: {paramName}"; } return(false); } string minStr = minMaxRegex[0]; string maxStr = minMaxRegex[1]; int numSteps = 100; Double stepSize = -1; bool logBase = false; if (scaleStr != null) { try { string[] options = scaleStr.Split(';'); bool[] optionsSpecified = new bool[3]; foreach (string option in options) { if (option.StartsWith("log") && !option.StartsWith("log-") && !option.StartsWith("log:-")) { logBase = true; optionsSpecified[0] = true; } if (option.StartsWith("steps")) { numSteps = int.Parse(option.Substring(option.IndexOf(':') + 1)); optionsSpecified[1] = true; } if (option.StartsWith("inc")) { stepSize = Double.Parse(option.Substring(option.IndexOf(':') + 1), CultureInfo.InvariantCulture); optionsSpecified[2] = true; } } if (options.Length != optionsSpecified.Count(b => b)) { error = $"Could not parse sweep range for parameter: {paramName}"; return(false); } } catch (Exception e) { error = $"Error creating sweep generator for parameter '{paramName}': {e.Message}"; return(false); } } if (paramType == typeof(UInt16) || paramType == typeof(UInt32) || paramType == typeof(UInt64) || paramType == typeof(short) || paramType == typeof(int) || paramType == typeof(long)) { long min; long max; if (!long.TryParse(minStr, out min) || !long.TryParse(maxStr, out max)) { return(false); } var generatorArgs = new Microsoft.ML.Sweeper.LongParamArguments(); generatorArgs.Name = paramName; generatorArgs.Min = min; generatorArgs.Max = max; generatorArgs.NumSteps = numSteps; generatorArgs.StepSize = (stepSize > 0 ? stepSize : new Nullable <Double>()); generatorArgs.LogBase = logBase; try { sweepValues = new LongValueGenerator(generatorArgs); } catch (Exception e) { error = $"Error creating sweep generator for parameter '{paramName}': {e.Message}"; return(false); } } else { Float minF; Float maxF; if (!Float.TryParse(minStr, out minF) || !Float.TryParse(maxStr, out maxF)) { return(false); } var floatArgs = new FloatParamArguments(); floatArgs.Name = paramName; floatArgs.Min = minF; floatArgs.Max = maxF; floatArgs.NumSteps = numSteps; floatArgs.StepSize = (stepSize > 0 ? stepSize : new Nullable <Double>()); floatArgs.LogBase = logBase; try { sweepValues = new FloatValueGenerator(floatArgs); } catch (Exception e) { error = $"Error creating sweep generator for parameter '{paramName}': {e.Message}"; return(false); } } return(true); }
private void ApplyCore(ReadOnlyMemory <char>[][] data, Column column) { bool numericColumn = CmdParser.IsNumericType(column.Kind?.ToType()); //Statistics for numeric column or length of the text in the case of non-numeric column. Stats stats = new Stats(); //Statistics for number of spaces in the case non-numeric column. Stats spacesStats = new Stats(); foreach (int index in column.Indices) { if (index >= data.GetLength(0)) { break; } foreach (ReadOnlyMemory <char> value in data[index]) { string columnPurposeString = column.Purpose; Stats statsPerPurpose; Stats statsPerPurposeSpaces; if (!StatsPerColumnPurpose.ContainsKey(columnPurposeString)) { statsPerPurpose = new Stats(); statsPerPurposeSpaces = new Stats(); StatsPerColumnPurpose.Add(columnPurposeString, statsPerPurpose); StatsPerColumnPurposeWithSpaces.Add(columnPurposeString, statsPerPurposeSpaces); } else { statsPerPurpose = StatsPerColumnPurpose[columnPurposeString]; statsPerPurposeSpaces = StatsPerColumnPurposeWithSpaces[columnPurposeString]; } string valueString = value.ToString(); if (numericColumn) { double valueLocal; if (Double.TryParse(valueString, out valueLocal)) { stats.Add(valueLocal); statsPerPurpose.Add(valueLocal); } } else { stats.Add(valueString.Length); statsPerPurpose.Add(valueString.Length); int spacesCount = valueString.Count(c => c == ' '); spacesStats.Add(spacesCount); statsPerPurposeSpaces.Add(spacesCount); } } } if (numericColumn) { NumericColumnFeatures.Add(new ColumnStatistics { Column = column, Stats = stats }); } else { NonNumericColumnLengthFeature.Add(new ColumnStatistics { Column = column, Stats = stats }); NonNumericColumnSpacesFeature.Add(new ColumnStatistics { Column = column, Stats = spacesStats }); } }