예제 #1
0
        private static int[] ConvertToVector_Text_Number(string text, char[] uniqueNonWhitespace)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(new int[0]);
            }

            int[] retVal = new int[text.Length];

            for (int cntr = 0; cntr < retVal.Length; cntr++)
            {
                char current = text[cntr];

                if (!UtilityCore.IsWhitespace(current))                                    // just leave whitespace as zero
                {
                    retVal[cntr] = Array.IndexOf <char>(uniqueNonWhitespace, current) + 1; // UniqueChars should hold all possible chars.  If it doesn't, IndexOf will return -1, and adding one will make it look like whitespace (but that should never happen)
                }
            }

            return(retVal);
        }
예제 #2
0
        public static SOMFieldStats GetFieldStats(IEnumerable <string> field, SOMFieldType?typeOverride = null)
        {
            // Dedupe
            string[] deduped = field.
                               Select(o => o.Trim()).
                               Distinct().
                               ToArray();

            // Unique Chars
            char[] uniqueChars = deduped.
                                 SelectMany(o => o).
                                 Distinct().
                                 OrderBy(o => o).
                                 ToArray();

            // FieldType
            SOMFieldType type = typeOverride ?? GetFieldType(field);

            // Numeric Stats
            double?numericMin      = null;
            double?numericMax      = null;
            double?numericAvg      = null;
            double?numericStandDev = null;

            if (type == SOMFieldType.Integer || type == SOMFieldType.FloatingPoint)
            {
                double[] numerics = field.
                                    Select(o => string.IsNullOrWhiteSpace(o) ? 0d : double.Parse(o.Trim())).
                                    ToArray();

                numericMin = numerics.Min();
                numericMax = numerics.Max();
                var avg_stdev = Math1D.Get_Average_StandardDeviation(numerics);
                numericAvg      = avg_stdev.Item1;
                numericStandDev = avg_stdev.Item2;
            }

            // Date Stats
            DateTime?dateMin      = null;
            DateTime?dateMax      = null;
            DateTime?dateAvg      = null;
            TimeSpan?dateStandDev = null;

            if (type == SOMFieldType.DateTime)
            {
                DateTime[] dates = field.
                                   Where(o => !string.IsNullOrWhiteSpace(o)).
                                   Select(o => DateTime.Parse(o.Trim())).
                                   ToArray();

                dateMin = dates.Min();
                dateMax = dates.Max();
                var avg_stdev = Math1D.Get_Average_StandardDeviation(dates);
                dateAvg      = avg_stdev.Item1;
                dateStandDev = avg_stdev.Item2;
            }

            // Return
            return(new SOMFieldStats()
            {
                Count = field.Count(),
                UniqueCount = deduped.Length,

                MinLength = deduped.Min(o => o.Length),     // deduped are already trimmed
                MaxLength = deduped.Max(o => o.Length),
                UniqueChars = uniqueChars,
                UniqueChars_NonWhitespace = uniqueChars.Where(o => !UtilityCore.IsWhitespace(o)).ToArray(),

                FieldType = type,

                Numeric_Min = numericMin,
                Numeric_Max = numericMax,
                Numeric_Avg = numericAvg,
                Numeric_StandDev = numericStandDev,

                Date_Min = dateMin,
                Date_Max = dateMax,
                Date_Avg = dateAvg,
                Date_StandDev = dateStandDev,
            });
        }