public void IdentifyDatesFormatsUS() { var dateSep = "/"; var culture = new CultureInfo("en-US"); // Try the date formats foreach (var fmt in StringConversion.StandardDateTimeFormats.Keys) { // Fill Samples var samples = new HashSet <string>(); for (var month = 9; month < 10; month++) { for (var day = 10; day < 15; day++) { for (var hrs = 11; hrs < 13; hrs++) { for (var min = 24; min < 26; min++) { samples.Add(new DateTime(2010, month, day, hrs, min, 10, 876, DateTimeKind.Local).ToString(fmt, culture)); } } } } Assert.IsNotNull( StringConversion.CheckDate(samples, fmt, dateSep, ":", CultureInfo.CurrentCulture).FoundValueFormat, $"Test format {fmt}\nFirst not matching: {samples.First()}"); } }
public static CheckResult GuessDateTime(IList <string> samples, bool checkNamedDates, CancellationToken cancellationToken) { if (samples == null || samples.IsEmpty()) { throw new ArgumentNullException(nameof(samples)); } var checkResult = new CheckResult(); var firstValue = samples[0]; foreach (var fmt in StringConversion.StandardDateTimeFormats.MatchingforLength(firstValue.Length, checkNamedDates)) { if (cancellationToken.IsCancellationRequested) { return(null); } if (fmt.IndexOf('/') > 0) { foreach (var sep in StringConversion.DateSeparators) { var res = StringConversion.CheckDate(samples, fmt, sep, ":", CultureInfo.CurrentCulture); if (res.FoundValueFormat != null) { return(res); } checkResult.KeepBestPossibleMatch(res); } } else { var res = StringConversion.CheckDate(samples, fmt, CultureInfo.CurrentCulture.DateTimeFormat.DateSeparator, ":", CultureInfo.CurrentCulture); if (res.FoundValueFormat != null) { return(res); } checkResult.KeepBestPossibleMatch(res); } } return(checkResult); }
/// <summary> /// Guesses the value format. /// </summary> /// <param name="cancellationToken">A cancellation token</param> /// <param name="samples">The samples.</param> /// <param name="minRequiredSamples">The minimum required samples.</param> /// <param name="trueValue">The text to be regarded as <c>true</c></param> /// <param name="falseValue">The text to be regarded as <c>false</c></param> /// <param name="guessBoolean">Try to identify a boolean</param> /// <param name="guessGuid">Try to determine if its a GUID</param> /// <param name="guessNumeric">Try to determine if its a Number</param> /// <param name="guessDateTime">Try to determine if it is a date time</param> /// <param name="guessPercentage">Accept percentage values</param> /// <param name="serialDateTime">Allow serial Date time</param> /// <param name="checkNamedDates">if set to <c>true</c> [check named dates].</param> /// <returns><c>Null</c> if no format could be determined otherwise a <see cref="ValueFormat" /></returns> public static CheckResult GuessValueFormat(IList <string> samples, int minRequiredSamples, string trueValue, string falseValue, bool guessBoolean, bool guessGuid, bool guessNumeric, bool guessDateTime, bool guessPercentage, bool serialDateTime, bool checkNamedDates, ValueFormat othersValueFormatDate, CancellationToken cancellationToken) { Contract.Requires(samples != null); if (samples.IsEmpty()) { return(null); } var count = samples.Count(); var checkResult = new CheckResult { FoundValueFormat = new ValueFormat() }; // if it only one sample value and its false, assume its a boolean if (guessBoolean && count == 1 && !string.IsNullOrEmpty(falseValue)) { foreach (var value in samples) { if (value.Equals(falseValue, StringComparison.OrdinalIgnoreCase)) { checkResult.FoundValueFormat.DataType = DataType.Boolean; return(checkResult); } break; } } if (cancellationToken.IsCancellationRequested) { return(null); } // this could be a boolean if (guessBoolean && count <= 2) { var allParsed = true; string usedTrueValue = null; string usedFalseValue = null; foreach (var value in samples) { var result = StringConversion.StringToBooleanStrict(value, trueValue, falseValue); if (result == null) { allParsed = false; break; } if (result.Item1) { usedTrueValue = result.Item2; } else { usedFalseValue = result.Item2; } } if (allParsed) { checkResult.FoundValueFormat.DataType = DataType.Boolean; if (!string.IsNullOrEmpty(usedTrueValue)) { checkResult.FoundValueFormat.True = usedTrueValue; } if (!string.IsNullOrEmpty(usedFalseValue)) { checkResult.FoundValueFormat.False = usedFalseValue; } return(checkResult); } } if (cancellationToken.IsCancellationRequested) { return(null); } if (guessGuid && StringConversion.CheckGuid(samples)) { checkResult.FoundValueFormat.DataType = DataType.Guid; return(checkResult); } if (cancellationToken.IsCancellationRequested) { return(null); } // in case we have named dates, this is not feasible if (!checkNamedDates) { // Trying some chars, if they are in, assume its a string var valuesWithChars = 0; foreach (var value in samples) { // Not having AM PM or T as it might be part of a date Not having E in there as might be // part of a number u 1.487% o 6.264% n 2.365% i 6.286% h 7.232% s 6.327% This adds to a // 30% chance for each position in the text to determine if a text a regular text, if (value.IndexOfAny(new[] { 'u', 'U', 'o', 'O', 'i', 'I', 'n', 'N', 's', 'S', 'h', 'H' }) <= -1) { continue; } valuesWithChars++; // Only do so if more then half of the samples are string if (valuesWithChars < count / 2 && valuesWithChars < 10) { continue; } checkResult.FoundValueFormat.DataType = DataType.String; return(checkResult); } } if (count < minRequiredSamples && guessDateTime && othersValueFormatDate != null) { var res = StringConversion.CheckDate(samples, othersValueFormatDate.DateFormat, othersValueFormatDate.DateSeparator, othersValueFormatDate.TimeSeparator, CultureInfo.CurrentCulture); if (res.FoundValueFormat != null) { return(res); } } // if we have less than the required samples values do not try and try to get a type if (count < minRequiredSamples || cancellationToken.IsCancellationRequested) { return(null); } var firstValue = samples.First(); if (cancellationToken.IsCancellationRequested) { return(null); } // Guess a date format that could be interpreted as number before testing numbers if (guessDateTime && firstValue.Length == 8) { var res = StringConversion.CheckDate(samples, "yyyyMMdd", string.Empty, ":", CultureInfo.InvariantCulture); if (res.FoundValueFormat != null) { return(res); } checkResult.KeepBestPossibleMatch(res); } if (cancellationToken.IsCancellationRequested) { return(null); } // We need to have at least 10 sample values here its too dangerous to assume it is a date if (guessDateTime && serialDateTime && count > 10 && count > minRequiredSamples) { var res = StringConversion.CheckSerialDate(samples, true); if (res.FoundValueFormat != null) { return(res); } checkResult.KeepBestPossibleMatch(res); } if (cancellationToken.IsCancellationRequested) { return(null); } // assume dates are of the same format across the files we check if the dates // we have would possibly match no matter how many samples we have if (guessDateTime && othersValueFormatDate != null) { var res = StringConversion.CheckDate(samples, othersValueFormatDate.DateFormat, othersValueFormatDate.DateSeparator, othersValueFormatDate.TimeSeparator, CultureInfo.CurrentCulture); if (res.FoundValueFormat != null) { return(res); } } if (cancellationToken.IsCancellationRequested) { return(null); } if (guessNumeric) { var res = GuessNumeric(samples, guessPercentage, false, cancellationToken); if (res.FoundValueFormat != null) { return(res); } checkResult.KeepBestPossibleMatch(res); } if (cancellationToken.IsCancellationRequested) { return(null); } // Minimum length of a date is 4 characters if (guessDateTime && firstValue.Length > 3) { var res = GuessDateTime(samples, checkNamedDates, cancellationToken); if (res.FoundValueFormat != null) { return(res); } checkResult.KeepBestPossibleMatch(res); } if (cancellationToken.IsCancellationRequested) { return(null); } // if we have dates and allow serial dates, but do not guess numeric (this would be a fit) try // if the dates are all serial if (!guessDateTime || !serialDateTime || guessNumeric) { return(checkResult); } { var res = StringConversion.CheckSerialDate(samples, false); if (res.FoundValueFormat != null) { return(res); } checkResult.KeepBestPossibleMatch(res); } return(checkResult); }