Exemplo n.º 1
0
        /// <summary>
        /// Gets the column header of a file
        /// </summary>
        /// <param name="fileSetting">The file setting.</param>
        /// <param name="includeIgnored">Set to <c>true</c> if ignored columns should be listed as well, otherwise they will not be
        /// listed</param>
        /// <param name="processDisplay">The process display.</param>
        /// <returns>
        /// An array of string with the column headers
        /// </returns>
        public static ICollection <string> GetColumnHeader(IFileSetting fileSetting, bool includeIgnored, bool openIfNeeded, IProcessDisplay processDisplay)
        {
            Contract.Requires(fileSetting != null);

            var key = CacheListKeyColumnHeader(fileSetting.ID, includeIgnored);

            if (key.Length > 3 && ApplicationSetting.CacheList.TryGet(key, out var retValue))
            {
                return(retValue);
            }

            if (!openIfNeeded)
            {
                return(null);
            }

            using (var fileReader = fileSetting.GetFileReader())
            {
                fileReader.ProcessDisplay = processDisplay;
                fileReader.Open(false, processDisplay?.CancellationToken ?? CancellationToken.None);
                // if teh key was long enough it has been stored
                if (key.Length > 3)
                {
                    return(ApplicationSetting.CacheList.Get(key));
                }
                else
                {
                    var header = new HashSet <string>(StringComparer.OrdinalIgnoreCase);
                    for (var colindex = 0; colindex < fileReader.FieldCount; colindex++)
                    {
                        var col = fileReader.GetColumn(colindex);
                        if (includeIgnored || !col.Ignore)
                        {
                            header.Add(col.Name);
                        }
                    }
                    return(header);
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        ///   Gets the column header of a file
        /// </summary>
        /// <param name="fileSetting">The file setting.</param>
        /// <param name="processDisplay">The get process display.</param>
        /// <returns>
        ///   An array of string with the column headers where the column is empty
        /// </returns>
        public static string[] GetEmptyColumnHeader(IFileSetting fileSetting, IProcessDisplay processDisplay)
        {
            Contract.Requires(fileSetting != null);
            Contract.Requires(processDisplay != null);
            Contract.Ensures(Contract.Result <string[]>() != null);
            var emptyColumns = new List <string>();

            if (!fileSetting.HasFieldHeader)
            {
                return(emptyColumns.ToArray());
            }
            using (var fileReader = fileSetting.GetFileReader())
            {
                Contract.Assume(fileReader != null);
                fileReader.ProcessDisplay = processDisplay;
                fileReader.Open(true, processDisplay.CancellationToken);

                if (fileSetting is CsvFile)
                {
                    for (var column = 0; column < fileReader.FieldCount; column++)
                    {
                        var col = fileReader.GetColumn(column);
                        if (col.Size == 0)
                        {
                            emptyColumns.Add(col.Name);
                        }
                    }
                }
                else
                {
                    var columnHasData = new HashSet <int>();
                    for (var row = 0; row < 2000 && fileReader.Read(); row++)
                    {
                        for (var column = 0; column < fileReader.FieldCount; column++)
                        {
                            if (columnHasData.Contains(column))
                            {
                                continue;
                            }
                            if (fileReader[column].ToString().Length > 0)
                            {
                                columnHasData.Add(column);
                            }
                        }

                        if (columnHasData.Count == fileReader.FieldCount)
                        {
                            break;
                        }
                    }

                    for (var column = 0; column < fileReader.FieldCount; column++)
                    {
                        if (!columnHasData.Contains(column))
                        {
                            emptyColumns.Add(fileReader.GetName(column));
                        }
                    }
                }
            }

            return(emptyColumns.ToArray());
        }
Exemplo n.º 3
0
        /// <summary>
        ///   Fills the Column Format for reader fileSettings
        /// </summary>
        /// <param name="fileSetting">The file setting to check, and fill</param>
        /// <param name="addTextColumns">if set to <c>true</c> event string columns are added.</param>
        /// <param name="processDisplay">The process display.</param>
        public static IList <string> FillGuessColumnFormatReader(this IFileSetting fileSetting, bool addTextColumns,
                                                                 IProcessDisplay processDisplay)
        {
            if (processDisplay == null)
            {
                throw new ArgumentNullException(nameof(processDisplay));
            }

            Contract.Requires(fileSetting != null);
            var result = new List <string>();

            // if we should not detect, we can finish
            if (!ApplicationSetting.FillGuessSettings.DetectBoolean && !ApplicationSetting.FillGuessSettings.DetectGUID &&
                !ApplicationSetting.FillGuessSettings.DectectNumbers &&
                !ApplicationSetting.FillGuessSettings.DetectDateTime &&
                !ApplicationSetting.FillGuessSettings.DectectPercentage &&
                !ApplicationSetting.FillGuessSettings.SerialDateTime)
            {
                return(result);
            }

            var resetSkipRows = false;

            try
            {
                // Make sure that if we do have a CSV file without header that we will skip the first row that
                // might contain headers, but its simply set as without headers.
                if (fileSetting is CsvFile && !fileSetting.HasFieldHeader && fileSetting.SkipRows == 0)
                {
                    fileSetting.SkipRows = 1;
                    resetSkipRows        = true;
                }
                var othersValueFormatDate = CommonDateFormat(fileSetting.Column.Select(x => x.ValueFormat));

                using (var fileReader = fileSetting.GetFileReader())
                {
                    Contract.Assume(fileReader != null);
                    // fileReader.ProcessDisplay = processDisplay;

                    fileReader.Open(false, processDisplay.CancellationToken);
                    if (fileReader.FieldCount == 0 || fileReader.EndOfFile)
                    {
                        return(result);
                    }
                    processDisplay.SetProcess("Getting column headers");
                    processDisplay.Maximum = fileReader.FieldCount;

                    var columnNamesInFile = new List <string>();
                    for (var colindex = 0; colindex < fileReader.FieldCount; colindex++)
                    {
                        var newColumn = fileReader.GetColumn(colindex);
                        Contract.Assume(newColumn != null);
                        columnNamesInFile.Add(newColumn.Name);
                        var oldColumn = fileSetting.GetColumn(newColumn.Name);

                        processDisplay.SetProcess(newColumn.Name + " – Getting values", colindex);

                        var samples = GetSampleValues(fileReader, ApplicationSetting.FillGuessSettings.CheckedRecords,
                                                      colindex, ApplicationSetting.FillGuessSettings.SampleValues, fileSetting.TreatTextAsNull,
                                                      processDisplay.CancellationToken);

                        if (samples.IsEmpty())
                        {
                            processDisplay.SetProcess(newColumn.Name + " – No values found", colindex);
                            if (!addTextColumns)
                            {
                                continue;
                            }
                            result.Add($"{newColumn.Name} – No values found – Format : {newColumn.GetTypeAndFormatDescription()}");
                            fileSetting.ColumnAdd(newColumn);
                        }
                        else
                        {
                            var detect = !(ApplicationSetting.FillGuessSettings.IgnoreIdColums &&
                                           StringUtils.AssumeIDColumn(newColumn.Name) > 0);

                            if (samples.Count < 10)
                            {
                                processDisplay.SetProcess($"{newColumn.Name} – Only {samples.Count} values found in {ApplicationSetting.FillGuessSettings.CheckedRecords} rows", colindex);
                            }
                            else
                            {
                                processDisplay.SetProcess($"{newColumn.Name} – {samples.Count} values found – Examining format", colindex);
                            }

                            var checkResult = GuessValueFormat(samples, ApplicationSetting.FillGuessSettings.MinSamplesForIntDate,
                                                               ApplicationSetting.FillGuessSettings.TrueValue,
                                                               ApplicationSetting.FillGuessSettings.FalseValue,
                                                               ApplicationSetting.FillGuessSettings.DetectBoolean && detect,
                                                               ApplicationSetting.FillGuessSettings.DetectGUID && detect,
                                                               ApplicationSetting.FillGuessSettings.DectectNumbers && detect,
                                                               ApplicationSetting.FillGuessSettings.DetectDateTime && detect,
                                                               ApplicationSetting.FillGuessSettings.DectectPercentage && detect,
                                                               ApplicationSetting.FillGuessSettings.SerialDateTime && detect,
                                                               ApplicationSetting.FillGuessSettings.CheckNamedDates && detect,
                                                               othersValueFormatDate,
                                                               processDisplay.CancellationToken);

                            if (checkResult == null)
                            {
                                if (addTextColumns)
                                {
                                    checkResult = new CheckResult {
                                        FoundValueFormat = new ValueFormat()
                                    }
                                }
                                ;
                                else
                                {
                                    continue;
                                }
                            }

                            // if we have a mapping to a template that expects a integer and we only have integers but not enough...
                            if (oldColumn != null)
                            {
                                var oldValueFormat = oldColumn.GetTypeAndFormatDescription();

                                // if we have a date value format already store this
                                if (othersValueFormatDate == null && checkResult.FoundValueFormat.DataType == DataType.DateTime && checkResult.PossibleMatch)
                                {
                                    othersValueFormatDate = checkResult.FoundValueFormat;
                                }

                                if (checkResult.FoundValueFormat.Equals(oldColumn.ValueFormat))
                                {
                                    processDisplay.SetProcess($"{newColumn.Name} – Format : {oldValueFormat} – not changed",
                                                              colindex);
                                }
                                else
                                {
                                    oldColumn.ValueFormat = checkResult.FoundValueFormat;
                                }

                                var newValueFormat = checkResult.FoundValueFormat.GetTypeAndFormatDescription();
                                if (oldValueFormat.Equals(newValueFormat, StringComparison.Ordinal))
                                {
                                    continue;
                                }
                                var msg = $"{newColumn.Name} – Format : {newValueFormat} – updated from {oldValueFormat}";
                                result.Add(msg);
                                processDisplay.SetProcess(msg, colindex);
                            }
                            else
                            {
                                if (!addTextColumns && checkResult.FoundValueFormat.DataType == DataType.String)
                                {
                                    continue;
                                }
                                newColumn.ValueFormat = checkResult.FoundValueFormat;
                                var msg = $"{newColumn.Name} – Format : {newColumn.GetTypeAndFormatDescription()}";
                                processDisplay.SetProcess(msg, colindex);
                                result.Add(msg);
                                fileSetting.ColumnAdd(newColumn);
                            }
                        }
                    }

                    // The fileReader does not have the column information yet, let the reader know
                    fileReader.OverrideColumnFormatFromSetting(fileReader.FieldCount);
                    // in case its Excel, check all doubles if they could be integer
                    if (fileSetting is IExcelFile)
                    {
                        for (var colindex = 0; colindex < fileReader.FieldCount; colindex++)
                        {
                            var oldColumn = fileReader.GetColumn(colindex);
                            var detect    = !(ApplicationSetting.FillGuessSettings.IgnoreIdColums &&
                                              StringUtils.AssumeIDColumn(oldColumn.Name) > 0);

                            if (oldColumn != null && oldColumn.DataType == DataType.Double)
                            {
                                Column newColumn = null;

                                if (detect)
                                {
                                    var samples = GetSampleValues(fileReader, ApplicationSetting.FillGuessSettings.CheckedRecords,
                                                                  colindex, ApplicationSetting.FillGuessSettings.SampleValues, fileSetting.TreatTextAsNull,
                                                                  processDisplay.CancellationToken);

                                    if (!samples.IsEmpty())
                                    {
                                        var checkResult = GuessNumeric(samples, false, true, processDisplay.CancellationToken);
                                        if (checkResult != null && checkResult.FoundValueFormat.DataType != DataType.Double)
                                        {
                                            newColumn = fileSetting.GetColumn(oldColumn.Name);
                                            if (newColumn == null)
                                            {
                                                newColumn = fileSetting.ColumnAdd(oldColumn);
                                            }

                                            newColumn.DataType = checkResult.FoundValueFormat.DataType;
                                        }
                                    }
                                }
                                else
                                {
                                    newColumn = fileSetting.GetColumn(oldColumn.Name);
                                    if (newColumn == null)
                                    {
                                        newColumn = fileSetting.ColumnAdd(oldColumn);
                                    }
                                    newColumn.DataType = DataType.String;
                                }
                                if (newColumn != null)
                                {
                                    var msg = $"{newColumn.Name} – Overwritten Excel Format : {newColumn.GetTypeAndFormatDescription()}";
                                    processDisplay.SetProcess(msg, colindex);
                                    result.Add(msg);
                                }
                            }
                        }
                    }

                    if (ApplicationSetting.FillGuessSettings.DateParts)
                    {
                        // Try to find a time for a date if the date does not already have a time
                        // Case a) TimeFormat has already been recognized
                        for (var colindex = 0; colindex < fileReader.FieldCount; colindex++)
                        {
                            var columnDate = fileReader.GetColumn(colindex);

                            // Possibly add Time Zone
                            if (columnDate.DataType == DataType.DateTime && string.IsNullOrEmpty(columnDate.TimeZonePart))
                            {
                                for (var coltimeZone = 0; coltimeZone < fileReader.FieldCount; coltimeZone++)
                                {
                                    var columnTimeZone = fileReader.GetColumn(coltimeZone);
                                    var colName        = columnTimeZone.Name.NoSpecials().ToUpperInvariant();
                                    if (columnTimeZone.DataType != DataType.String && columnTimeZone.DataType != DataType.Integer ||
                                        colName != "TIMEZONE" && colName != "TIMEZONEID" && colName != "TIME ZONE" &&
                                        colName != "TIME ZONE ID")
                                    {
                                        continue;
                                    }

                                    columnDate.TimeZonePart = columnTimeZone.Name;
                                    result.Add($"{columnDate.Name} – Added Time Zone : {columnTimeZone.Name}");
                                }
                            }

                            if (columnDate.DataType != DataType.DateTime || !string.IsNullOrEmpty(columnDate.TimePart) ||
                                columnDate.ValueFormat.DateFormat.IndexOfAny(new[] { ':', 'h', 'H', 'm', 's', 't' }) != -1)
                            {
                                continue;
                            }
                            // We have a date column without time
                            for (var coltime = 0; coltime < fileReader.FieldCount; coltime++)
                            {
                                var columnTime = fileReader.GetColumn(coltime);
                                if (columnTime.DataType != DataType.DateTime || !string.IsNullOrEmpty(columnDate.TimePart) ||
                                    columnTime.ValueFormat.DateFormat.IndexOfAny(new[] { '/', 'y', 'M', 'd' }) != -1)
                                {
                                    continue;
                                }
                                // We now have a time column,
                                // checked if the names somehow make sense
                                if (!columnDate.Name.NoSpecials().ToUpperInvariant().Replace("DATE", string.Empty).Equals(columnTime.Name.NoSpecials().ToUpperInvariant().Replace("TIME", string.Empty), StringComparison.Ordinal))
                                {
                                    continue;
                                }

                                columnDate.TimePart       = columnTime.Name;
                                columnDate.TimePartFormat = columnTime.ValueFormat.DateFormat;
                                result.Add($"{columnDate.Name} – Added Time Part : {columnTime.Name}");
                            }
                        }

                        // Case b) TimeFormat has not been recognized (e.G. all values are 08:00) only look in adjacent fields
                        for (var colindex = 0; colindex < fileReader.FieldCount; colindex++)
                        {
                            var columnDate = fileReader.GetColumn(colindex);
                            if (columnDate.DataType != DataType.DateTime || !string.IsNullOrEmpty(columnDate.TimePart) ||
                                columnDate.ValueFormat.DateFormat.IndexOfAny(new[] { ':', 'h', 'H', 'm', 's', 't' }) != -1)
                            {
                                continue;
                            }

                            if (colindex + 1 < fileReader.FieldCount)
                            {
                                var columnTime = fileReader.GetColumn(colindex + 1);
                                if (columnTime.DataType == DataType.String && columnDate.Name.NoSpecials().ToUpperInvariant()
                                    .Replace("DATE", string.Empty)
                                    .Equals(columnTime.Name.NoSpecials().ToUpperInvariant().Replace("TIME", string.Empty),
                                            StringComparison.OrdinalIgnoreCase))
                                {
                                    columnDate.TimePart = columnTime.Name;
                                    {
                                        var samples = GetSampleValues(fileReader, 1, colindex + 1, 1, fileSetting.TreatTextAsNull,
                                                                      processDisplay.CancellationToken);
                                        var first = samples.FirstOrDefault();
                                        if (first != null)
                                        {
                                            if (first.Length == 8 || first.Length == 5)
                                            {
                                                columnTime.DataType = DataType.DateTime;
                                                var val = new ValueFormat(DataType.DateTime)
                                                {
                                                    DateFormat = first.Length == 8 ? "HH:mm:ss" : "HH:mm"
                                                };
                                                columnTime.ValueFormat = val;
                                                fileSetting.ColumnAdd(columnTime);
                                                result.Add($"{columnTime.Name} – Format : {columnTime.GetTypeAndFormatDescription()}");
                                            }
                                        }
                                    }

                                    result.Add($"{columnDate.Name} – Added Time Part : {columnTime.Name}");
                                    continue;
                                }
                            }

                            if (colindex <= 0)
                            {
                                continue;
                            }
                            {
                                var columnTime = fileReader.GetColumn(colindex - 1);
                                if (columnTime.DataType != DataType.String ||
                                    !columnDate.Name.NoSpecials().ToUpperInvariant().Replace("DATE", string.Empty).Equals(columnTime.Name.NoSpecials().ToUpperInvariant().Replace("TIME", string.Empty), StringComparison.Ordinal))
                                {
                                    continue;
                                }

                                columnDate.TimePart = columnTime.Name;
                                {
                                    var samples = GetSampleValues(fileReader, 1, colindex - 1, 1, fileSetting.TreatTextAsNull,
                                                                  processDisplay.CancellationToken);
                                    var first = samples.FirstOrDefault();
                                    if (first != null)
                                    {
                                        if (first.Length == 8 || first.Length == 5)
                                        {
                                            var val = new ValueFormat(DataType.DateTime)
                                            {
                                                DateFormat = first.Length == 8 ? "HH:mm:ss" : "HH:mm"
                                            };
                                            fileSetting.ColumnAdd(columnTime);
                                            columnTime.ValueFormat = val;
                                            result.Add($"{columnTime.Name} – Format : {columnTime.GetTypeAndFormatDescription()}");
                                        }
                                    }
                                }
                                result.Add($"{columnDate.Name} – Added Time Part : {columnTime.Name}");
                            }
                        }
                    }

                    // Sort the columns in fileSetting by order in file
                    fileSetting.SortColumnByName(columnNamesInFile);
                }
            }
            finally
            {
                if (resetSkipRows)
                {
                    fileSetting.SkipRows = 0;
                }
            }

            return(result);
        }