예제 #1
0
        public static CheckResult GuessDateTime(IList <string> samples, bool checkNamedDates,
                                                CancellationToken cancellationToken)
        {
            if (samples == null || samples.IsEmpty())
            {
                throw new ArgumentNullException(nameof(samples));
            }

            var checkResult = new CheckResult();
            var firstValue  = samples[0];

            foreach (var fmt in StringConversion.StandardDateTimeFormats.MatchingforLength(firstValue.Length, checkNamedDates))
            {
                if (cancellationToken.IsCancellationRequested)
                {
                    return(null);
                }

                if (fmt.IndexOf('/') > 0)
                {
                    foreach (var sep in StringConversion.DateSeparators)
                    {
                        var res = StringConversion.CheckDate(samples, fmt, sep, ":", CultureInfo.CurrentCulture);
                        if (res.FoundValueFormat != null)
                        {
                            return(res);
                        }

                        checkResult.KeepBestPossibleMatch(res);
                    }
                }
                else
                {
                    var res = StringConversion.CheckDate(samples, fmt,
                                                         CultureInfo.CurrentCulture.DateTimeFormat.DateSeparator, ":", CultureInfo.CurrentCulture);
                    if (res.FoundValueFormat != null)
                    {
                        return(res);
                    }

                    checkResult.KeepBestPossibleMatch(res);
                }
            }
            return(checkResult);
        }
예제 #2
0
        /// <summary>
        ///   Guesses the value format.
        /// </summary>
        /// <param name="cancellationToken">A cancellation token</param>
        /// <param name="samples">The samples.</param>
        /// <param name="minRequiredSamples">The minimum required samples.</param>
        /// <param name="trueValue">The text to be regarded as <c>true</c></param>
        /// <param name="falseValue">The text to be regarded as <c>false</c></param>
        /// <param name="guessBoolean">Try to identify a boolean</param>
        /// <param name="guessGuid">Try to determine if its a GUID</param>
        /// <param name="guessNumeric">Try to determine if its a Number</param>
        /// <param name="guessDateTime">Try to determine if it is a date time</param>
        /// <param name="guessPercentage">Accept percentage values</param>
        /// <param name="serialDateTime">Allow serial Date time</param>
        /// <param name="checkNamedDates">if set to <c>true</c> [check named dates].</param>
        /// <returns><c>Null</c> if no format could be determined otherwise a <see cref="ValueFormat" /></returns>
        public static CheckResult GuessValueFormat(IList <string> samples, int minRequiredSamples,
                                                   string trueValue, string falseValue, bool guessBoolean, bool guessGuid, bool guessNumeric, bool guessDateTime,
                                                   bool guessPercentage, bool serialDateTime, bool checkNamedDates, ValueFormat othersValueFormatDate, CancellationToken cancellationToken)
        {
            Contract.Requires(samples != null);

            if (samples.IsEmpty())
            {
                return(null);
            }

            var count       = samples.Count();
            var checkResult = new CheckResult {
                FoundValueFormat = new ValueFormat()
            };

            // if it only one sample value and its false, assume its a boolean
            if (guessBoolean && count == 1 && !string.IsNullOrEmpty(falseValue))
            {
                foreach (var value in samples)
                {
                    if (value.Equals(falseValue, StringComparison.OrdinalIgnoreCase))
                    {
                        checkResult.FoundValueFormat.DataType = DataType.Boolean;
                        return(checkResult);
                    }

                    break;
                }
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // this could be a boolean
            if (guessBoolean && count <= 2)
            {
                var    allParsed      = true;
                string usedTrueValue  = null;
                string usedFalseValue = null;
                foreach (var value in samples)
                {
                    var result = StringConversion.StringToBooleanStrict(value, trueValue, falseValue);
                    if (result == null)
                    {
                        allParsed = false;
                        break;
                    }

                    if (result.Item1)
                    {
                        usedTrueValue = result.Item2;
                    }
                    else
                    {
                        usedFalseValue = result.Item2;
                    }
                }

                if (allParsed)
                {
                    checkResult.FoundValueFormat.DataType = DataType.Boolean;
                    if (!string.IsNullOrEmpty(usedTrueValue))
                    {
                        checkResult.FoundValueFormat.True = usedTrueValue;
                    }
                    if (!string.IsNullOrEmpty(usedFalseValue))
                    {
                        checkResult.FoundValueFormat.False = usedFalseValue;
                    }
                    return(checkResult);
                }
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            if (guessGuid && StringConversion.CheckGuid(samples))
            {
                checkResult.FoundValueFormat.DataType = DataType.Guid;
                return(checkResult);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // in case we have named dates, this is not feasible
            if (!checkNamedDates)
            {
                // Trying some chars, if they are in, assume its a string
                var valuesWithChars = 0;
                foreach (var value in samples)
                {
                    // Not having AM PM or T as it might be part of a date Not having E in there as might be
                    // part of a number u 1.487% o 6.264% n 2.365% i 6.286% h 7.232% s 6.327% This adds to a
                    // 30% chance for each position in the text to determine if a text a regular text,
                    if (value.IndexOfAny(new[] { 'u', 'U', 'o', 'O', 'i', 'I', 'n', 'N', 's', 'S', 'h', 'H' }) <= -1)
                    {
                        continue;
                    }
                    valuesWithChars++;
                    // Only do so if more then half of the samples are string
                    if (valuesWithChars < count / 2 && valuesWithChars < 10)
                    {
                        continue;
                    }
                    checkResult.FoundValueFormat.DataType = DataType.String;
                    return(checkResult);
                }
            }

            if (count < minRequiredSamples && guessDateTime && othersValueFormatDate != null)
            {
                var res = StringConversion.CheckDate(samples, othersValueFormatDate.DateFormat, othersValueFormatDate.DateSeparator, othersValueFormatDate.TimeSeparator, CultureInfo.CurrentCulture);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
            }

            // if we have less than the required samples values do not try and try to get a type
            if (count < minRequiredSamples || cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            var firstValue = samples.First();

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // Guess a date format that could be interpreted as number before testing numbers
            if (guessDateTime && firstValue.Length == 8)
            {
                var res = StringConversion.CheckDate(samples, "yyyyMMdd", string.Empty, ":", CultureInfo.InvariantCulture);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // We need to have at least 10 sample values here its too dangerous to assume it is a date
            if (guessDateTime && serialDateTime && count > 10 && count > minRequiredSamples)
            {
                var res = StringConversion.CheckSerialDate(samples, true);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // assume dates are of the same format across the files we check if the dates
            // we have would possibly match no matter how many samples we have
            if (guessDateTime && othersValueFormatDate != null)
            {
                var res = StringConversion.CheckDate(samples, othersValueFormatDate.DateFormat, othersValueFormatDate.DateSeparator, othersValueFormatDate.TimeSeparator, CultureInfo.CurrentCulture);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            if (guessNumeric)
            {
                var res = GuessNumeric(samples, guessPercentage, false, cancellationToken);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // Minimum length of a date is 4 characters
            if (guessDateTime && firstValue.Length > 3)
            {
                var res = GuessDateTime(samples, checkNamedDates, cancellationToken);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // if we have dates and allow serial dates, but do not guess numeric (this would be a fit) try
            // if the dates are all serial
            if (!guessDateTime || !serialDateTime || guessNumeric)
            {
                return(checkResult);
            }

            {
                var res = StringConversion.CheckSerialDate(samples, false);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }
            return(checkResult);
        }
예제 #3
0
        public static CheckResult GuessNumeric(IList <string> samples, bool guessPercentage,
                                               bool allowStartingZero, CancellationToken cancellationToken)
        {
            var checkResult = new CheckResult();

            var possibleGrouping = new List <char>();

            // Determine which decimalGrouping could be used
            foreach (var caracter in StringConversion.DecimalGroupings)
            {
                if (caracter == '\0')
                {
                    continue;
                }
                foreach (var smp in samples)
                {
                    if (smp.IndexOf(caracter) <= -1)
                    {
                        continue;
                    }
                    possibleGrouping.Add(caracter);
                    break;
                }
            }

            possibleGrouping.Add('\0');
            var possibleDecimal = new List <char>();

            foreach (var caracter in StringConversion.DecimalSeparators)
            {
                if (caracter == '\0')
                {
                    continue;
                }
                foreach (var smp in samples)
                {
                    if (smp.IndexOf(caracter) <= -1)
                    {
                        continue;
                    }
                    possibleDecimal.Add(caracter);
                    break;
                }
            }

            // Need to have at least one decimal separator
            if (possibleDecimal.Count == 0)
            {
                possibleDecimal.Add('.');
            }

            foreach (var thousandSeparator in possibleGrouping)
            {
                // Try Numbers: Int and Decimal
                foreach (var decimalSeparator in possibleDecimal)
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        return(null);
                    }
                    if (decimalSeparator.Equals(thousandSeparator))
                    {
                        continue;
                    }
                    var res = StringConversion.CheckNumber(samples, decimalSeparator, thousandSeparator, guessPercentage,
                                                           allowStartingZero);
                    if (res.FoundValueFormat != null)
                    {
                        return(res);
                    }

                    checkResult.KeepBestPossibleMatch(res);
                }
            }

            return(checkResult);
        }