Esempio n. 1
0
        public void IdentifyDatesFormatsUS()
        {
            var dateSep = "/";
            var culture = new CultureInfo("en-US");

            // Try the date formats
            foreach (var fmt in StringConversion.StandardDateTimeFormats.Keys)
            {
                // Fill Samples
                var samples = new HashSet <string>();
                for (var month = 9; month < 10; month++)
                {
                    for (var day = 10; day < 15; day++)
                    {
                        for (var hrs = 11; hrs < 13; hrs++)
                        {
                            for (var min = 24; min < 26; min++)
                            {
                                samples.Add(new DateTime(2010, month, day, hrs, min, 10, 876, DateTimeKind.Local).ToString(fmt, culture));
                            }
                        }
                    }
                }

                Assert.IsNotNull(
                    StringConversion.CheckDate(samples, fmt, dateSep, ":", CultureInfo.CurrentCulture).FoundValueFormat,
                    $"Test format {fmt}\nFirst not matching: {samples.First()}");
            }
        }
Esempio n. 2
0
        public static CheckResult GuessDateTime(IList <string> samples, bool checkNamedDates,
                                                CancellationToken cancellationToken)
        {
            if (samples == null || samples.IsEmpty())
            {
                throw new ArgumentNullException(nameof(samples));
            }

            var checkResult = new CheckResult();
            var firstValue  = samples[0];

            foreach (var fmt in StringConversion.StandardDateTimeFormats.MatchingforLength(firstValue.Length, checkNamedDates))
            {
                if (cancellationToken.IsCancellationRequested)
                {
                    return(null);
                }

                if (fmt.IndexOf('/') > 0)
                {
                    foreach (var sep in StringConversion.DateSeparators)
                    {
                        var res = StringConversion.CheckDate(samples, fmt, sep, ":", CultureInfo.CurrentCulture);
                        if (res.FoundValueFormat != null)
                        {
                            return(res);
                        }

                        checkResult.KeepBestPossibleMatch(res);
                    }
                }
                else
                {
                    var res = StringConversion.CheckDate(samples, fmt,
                                                         CultureInfo.CurrentCulture.DateTimeFormat.DateSeparator, ":", CultureInfo.CurrentCulture);
                    if (res.FoundValueFormat != null)
                    {
                        return(res);
                    }

                    checkResult.KeepBestPossibleMatch(res);
                }
            }
            return(checkResult);
        }
Esempio n. 3
0
        /// <summary>
        ///   Guesses the value format.
        /// </summary>
        /// <param name="cancellationToken">A cancellation token</param>
        /// <param name="samples">The samples.</param>
        /// <param name="minRequiredSamples">The minimum required samples.</param>
        /// <param name="trueValue">The text to be regarded as <c>true</c></param>
        /// <param name="falseValue">The text to be regarded as <c>false</c></param>
        /// <param name="guessBoolean">Try to identify a boolean</param>
        /// <param name="guessGuid">Try to determine if its a GUID</param>
        /// <param name="guessNumeric">Try to determine if its a Number</param>
        /// <param name="guessDateTime">Try to determine if it is a date time</param>
        /// <param name="guessPercentage">Accept percentage values</param>
        /// <param name="serialDateTime">Allow serial Date time</param>
        /// <param name="checkNamedDates">if set to <c>true</c> [check named dates].</param>
        /// <returns><c>Null</c> if no format could be determined otherwise a <see cref="ValueFormat" /></returns>
        public static CheckResult GuessValueFormat(IList <string> samples, int minRequiredSamples,
                                                   string trueValue, string falseValue, bool guessBoolean, bool guessGuid, bool guessNumeric, bool guessDateTime,
                                                   bool guessPercentage, bool serialDateTime, bool checkNamedDates, ValueFormat othersValueFormatDate, CancellationToken cancellationToken)
        {
            Contract.Requires(samples != null);

            if (samples.IsEmpty())
            {
                return(null);
            }

            var count       = samples.Count();
            var checkResult = new CheckResult {
                FoundValueFormat = new ValueFormat()
            };

            // if it only one sample value and its false, assume its a boolean
            if (guessBoolean && count == 1 && !string.IsNullOrEmpty(falseValue))
            {
                foreach (var value in samples)
                {
                    if (value.Equals(falseValue, StringComparison.OrdinalIgnoreCase))
                    {
                        checkResult.FoundValueFormat.DataType = DataType.Boolean;
                        return(checkResult);
                    }

                    break;
                }
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // this could be a boolean
            if (guessBoolean && count <= 2)
            {
                var    allParsed      = true;
                string usedTrueValue  = null;
                string usedFalseValue = null;
                foreach (var value in samples)
                {
                    var result = StringConversion.StringToBooleanStrict(value, trueValue, falseValue);
                    if (result == null)
                    {
                        allParsed = false;
                        break;
                    }

                    if (result.Item1)
                    {
                        usedTrueValue = result.Item2;
                    }
                    else
                    {
                        usedFalseValue = result.Item2;
                    }
                }

                if (allParsed)
                {
                    checkResult.FoundValueFormat.DataType = DataType.Boolean;
                    if (!string.IsNullOrEmpty(usedTrueValue))
                    {
                        checkResult.FoundValueFormat.True = usedTrueValue;
                    }
                    if (!string.IsNullOrEmpty(usedFalseValue))
                    {
                        checkResult.FoundValueFormat.False = usedFalseValue;
                    }
                    return(checkResult);
                }
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            if (guessGuid && StringConversion.CheckGuid(samples))
            {
                checkResult.FoundValueFormat.DataType = DataType.Guid;
                return(checkResult);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // in case we have named dates, this is not feasible
            if (!checkNamedDates)
            {
                // Trying some chars, if they are in, assume its a string
                var valuesWithChars = 0;
                foreach (var value in samples)
                {
                    // Not having AM PM or T as it might be part of a date Not having E in there as might be
                    // part of a number u 1.487% o 6.264% n 2.365% i 6.286% h 7.232% s 6.327% This adds to a
                    // 30% chance for each position in the text to determine if a text a regular text,
                    if (value.IndexOfAny(new[] { 'u', 'U', 'o', 'O', 'i', 'I', 'n', 'N', 's', 'S', 'h', 'H' }) <= -1)
                    {
                        continue;
                    }
                    valuesWithChars++;
                    // Only do so if more then half of the samples are string
                    if (valuesWithChars < count / 2 && valuesWithChars < 10)
                    {
                        continue;
                    }
                    checkResult.FoundValueFormat.DataType = DataType.String;
                    return(checkResult);
                }
            }

            if (count < minRequiredSamples && guessDateTime && othersValueFormatDate != null)
            {
                var res = StringConversion.CheckDate(samples, othersValueFormatDate.DateFormat, othersValueFormatDate.DateSeparator, othersValueFormatDate.TimeSeparator, CultureInfo.CurrentCulture);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
            }

            // if we have less than the required samples values do not try and try to get a type
            if (count < minRequiredSamples || cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            var firstValue = samples.First();

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // Guess a date format that could be interpreted as number before testing numbers
            if (guessDateTime && firstValue.Length == 8)
            {
                var res = StringConversion.CheckDate(samples, "yyyyMMdd", string.Empty, ":", CultureInfo.InvariantCulture);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // We need to have at least 10 sample values here its too dangerous to assume it is a date
            if (guessDateTime && serialDateTime && count > 10 && count > minRequiredSamples)
            {
                var res = StringConversion.CheckSerialDate(samples, true);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // assume dates are of the same format across the files we check if the dates
            // we have would possibly match no matter how many samples we have
            if (guessDateTime && othersValueFormatDate != null)
            {
                var res = StringConversion.CheckDate(samples, othersValueFormatDate.DateFormat, othersValueFormatDate.DateSeparator, othersValueFormatDate.TimeSeparator, CultureInfo.CurrentCulture);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            if (guessNumeric)
            {
                var res = GuessNumeric(samples, guessPercentage, false, cancellationToken);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // Minimum length of a date is 4 characters
            if (guessDateTime && firstValue.Length > 3)
            {
                var res = GuessDateTime(samples, checkNamedDates, cancellationToken);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return(null);
            }

            // if we have dates and allow serial dates, but do not guess numeric (this would be a fit) try
            // if the dates are all serial
            if (!guessDateTime || !serialDateTime || guessNumeric)
            {
                return(checkResult);
            }

            {
                var res = StringConversion.CheckSerialDate(samples, false);
                if (res.FoundValueFormat != null)
                {
                    return(res);
                }
                checkResult.KeepBestPossibleMatch(res);
            }
            return(checkResult);
        }