private static int[] ConvertToVector_Text_Number(string text, char[] uniqueNonWhitespace) { if (string.IsNullOrEmpty(text)) { return(new int[0]); } int[] retVal = new int[text.Length]; for (int cntr = 0; cntr < retVal.Length; cntr++) { char current = text[cntr]; if (!UtilityCore.IsWhitespace(current)) // just leave whitespace as zero { retVal[cntr] = Array.IndexOf <char>(uniqueNonWhitespace, current) + 1; // UniqueChars should hold all possible chars. If it doesn't, IndexOf will return -1, and adding one will make it look like whitespace (but that should never happen) } } return(retVal); }
public static SOMFieldStats GetFieldStats(IEnumerable <string> field, SOMFieldType?typeOverride = null) { // Dedupe string[] deduped = field. Select(o => o.Trim()). Distinct(). ToArray(); // Unique Chars char[] uniqueChars = deduped. SelectMany(o => o). Distinct(). OrderBy(o => o). ToArray(); // FieldType SOMFieldType type = typeOverride ?? GetFieldType(field); // Numeric Stats double?numericMin = null; double?numericMax = null; double?numericAvg = null; double?numericStandDev = null; if (type == SOMFieldType.Integer || type == SOMFieldType.FloatingPoint) { double[] numerics = field. Select(o => string.IsNullOrWhiteSpace(o) ? 0d : double.Parse(o.Trim())). ToArray(); numericMin = numerics.Min(); numericMax = numerics.Max(); var avg_stdev = Math1D.Get_Average_StandardDeviation(numerics); numericAvg = avg_stdev.Item1; numericStandDev = avg_stdev.Item2; } // Date Stats DateTime?dateMin = null; DateTime?dateMax = null; DateTime?dateAvg = null; TimeSpan?dateStandDev = null; if (type == SOMFieldType.DateTime) { DateTime[] dates = field. Where(o => !string.IsNullOrWhiteSpace(o)). Select(o => DateTime.Parse(o.Trim())). ToArray(); dateMin = dates.Min(); dateMax = dates.Max(); var avg_stdev = Math1D.Get_Average_StandardDeviation(dates); dateAvg = avg_stdev.Item1; dateStandDev = avg_stdev.Item2; } // Return return(new SOMFieldStats() { Count = field.Count(), UniqueCount = deduped.Length, MinLength = deduped.Min(o => o.Length), // deduped are already trimmed MaxLength = deduped.Max(o => o.Length), UniqueChars = uniqueChars, UniqueChars_NonWhitespace = uniqueChars.Where(o => !UtilityCore.IsWhitespace(o)).ToArray(), FieldType = type, Numeric_Min = numericMin, Numeric_Max = numericMax, Numeric_Avg = numericAvg, Numeric_StandDev = numericStandDev, Date_Min = dateMin, Date_Max = dateMax, Date_Avg = dateAvg, Date_StandDev = dateStandDev, }); }