示例#1
0
        /// <summary>
        ///   Guesses the not a delimited file.
        /// </summary>
        /// <param name="setting"><see cref="ICsvFile" /> with the information</param>
        /// <returns><c>true</c> if this is most likely not a delimited file</returns>
        public static bool GuessNotADelimitedFile(ICsvFile setting)
        {
            Contract.Requires(setting != null);
            using (var improvedStream = ImprovedStream.OpenRead(setting))
                using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
                {
                    for (int i = 0; i < setting.SkipRows; i++)
                    {
                        streamReader.ReadLine();
                    }
                    // If the file doe not have a good delimiter
                    // has empty lines
                    var dc = GetDelimiterCounter(streamReader, '\0', 300);

                    // Have a proper delimiter
                    for (var sep = 0; sep < dc.Separators.Length; sep++)
                    {
                        if (dc.SeparatorRows[sep] >= dc.LastRow * 9 / 10)
                        {
                            Log.Info("Not a delimited file");
                            return(false);
                        }
                    }
                }
            Log.Info("Delimited file");
            return(true);
        }
 public CsvFileDataAdapter(
     ICsvFile csvFile,
     ICsvFileParser parser,
     IDataTypeDetector dataTypeDetector)
 {
     _csvFile          = csvFile;
     _parser           = parser;
     _dataTypeDetector = dataTypeDetector;
 }
示例#3
0
        public FindSkipRows(ICsvFile csvFile)
        {
            InitializeComponent();
            fileSetting = csvFile;
            fileSettingBindingSource.DataSource = csvFile;
            fileFormatBindingSource.DataSource  = csvFile.FileFormat;

            m_Stream = new ImprovedStream(new SourceAccess(csvFile));
            UpdateHighlight();
        }
示例#4
0
 /// <summary>
 ///   Determines the start row in the file
 /// </summary>
 /// <param name="setting"><see cref="ICsvFile" /> with the information</param>
 /// <returns>
 ///   The number of rows to skip
 /// </returns>
 public static int GuessStartRow(ICsvFile setting)
 {
     Contract.Requires(setting != null);
     using (var improvedStream = ImprovedStream.OpenRead(setting))
         using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
         {
             return(GuessStartRow(streamReader, setting.FileFormat.FieldDelimiterChar,
                                  setting.FileFormat.FieldQualifierChar));
         }
 }
示例#5
0
        /// <summary>
        /// Gets the <see cref="Encoding"/> of the textFile
        /// </summary>
        /// <param name="setting">The setting.</param>
        /// <returns></returns>
        public static Encoding GetEncoding(this ICsvFile setting)
        {
            Contract.Requires(setting != null);
            if (setting.CodePageId < 0)
            {
                GuessCodePage(setting);
            }

            return(Encoding.GetEncoding(setting.CodePageId));
        }
示例#6
0
 /// <summary>
 ///   Try to guess the new line sequence
 /// </summary>
 /// <param name="setting"><see cref="ICsvFile" /> with the information</param>
 /// <returns>The NewLine Combination used</returns>
 public static string GuessNewline(ICsvFile setting)
 {
     Contract.Requires(setting != null);
     using (var improvedStream = ImprovedStream.OpenRead(setting))
         using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
         {
             for (int i = 0; i < setting.SkipRows; i++)
             {
                 streamReader.ReadLine();
             }
             return(GuessNewline(streamReader, setting.FileFormat.FieldQualifierChar));
         }
 }
示例#7
0
        /// <summary>
        /// Opens the csv file, and tries to read the headers
        /// </summary>
        /// <param name="setting">The CSVFile fileSetting</param>
        /// <param name="processDisplay">The process display.</param>
        /// <returns>
        ///   <c>True</c> we could use the first row as header, <c>false</c> should not use first row as header
        /// </returns>
        public static bool GuessHasHeader(ICsvFile setting, CancellationToken cancellationToken)
        {
            Contract.Requires(setting != null);
            // Only do so if HasFieldHeader is still true
            if (!setting.HasFieldHeader)
            {
                Log.Info("Without Header Row");
                return(false);
            }

            using (var csvDataReader = new CsvFileReader(setting))
            {
                csvDataReader.Open(false, cancellationToken);

                var defaultNames = 0;

                // In addition check that all columns have real names and did not get an artificial name
                // or are numbers
                for (var counter = 0; counter < csvDataReader.FieldCount; counter++)
                {
                    var columnName = csvDataReader.GetName(counter);

                    // if replaced by a default assume no header
                    if (columnName.Equals(BaseFileReader.GetDefaultName(counter), StringComparison.OrdinalIgnoreCase))
                    {
                        if (defaultNames++ == (int)Math.Ceiling(csvDataReader.FieldCount / 2.0))
                        {
                            Log.Info("Without Header Row");
                            return(false);
                        }
                    }

                    // if its a number assume no headers
                    if (StringConversion.StringToDecimal(columnName, '.', ',', false).HasValue)
                    {
                        Log.Info("Without Header Row");
                        return(false);
                    }

                    // if its rather long assume no header
                    if (columnName.Length > 80)
                    {
                        Log.Info("Without Header Row");
                        return(false);
                    }
                }
                Log.Info("With Header Row");
                // if there is only one line assume its does not have a header
                return(true);
            }
        }
示例#8
0
 /// <summary>
 ///   Guesses the delimiter for a files. Done with a rather simple csv parsing, and trying to find
 ///   the delimiter that has the least variance in the read rows, if that is not possible the
 ///   delimiter with the highest number of occurrences.
 /// </summary>
 /// <param name="setting">The CSVFile fileSetting</param>
 /// <returns>
 ///   A character with the assumed delimiter for the file
 /// </returns>
 /// <remarks>
 ///   No Error will not be thrown.
 /// </remarks>
 public static string GuessDelimiter(ICsvFile setting)
 {
     Contract.Requires(setting != null);
     Contract.Ensures(Contract.Result <string>() != null);
     using (var improvedStream = ImprovedStream.OpenRead(setting))
         using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
         {
             for (int i = 0; i < setting.SkipRows; i++)
             {
                 streamReader.ReadLine();
             }
             return(GuessDelimiter(streamReader, setting.FileFormat.EscapeCharacterChar));
         }
 }
示例#9
0
        public CsvFileReader(ICsvFile fileSetting)
            : base(fileSetting)
        {
            m_CsvFile = fileSetting;
            if (string.IsNullOrEmpty(m_CsvFile.FileName))
            {
                throw new ApplicationException("FileName must be set");
            }

            if (ApplicationSetting.RemoteFileHandler != null && string.IsNullOrEmpty(m_CsvFile?.RemoteFileName))
            {
                if (!FileSystemUtils.FileExists(m_CsvFile.FullPath))
                {
                    throw new FileNotFoundException(
                              $"The file '{FileSystemUtils.GetShortDisplayFileName(m_CsvFile.FileName, 80)}' does not exist or is not accessible.", m_CsvFile.FileName);
                }
            }
            if (m_CsvFile.FileFormat.FieldDelimiterChar == c_Cr ||
                m_CsvFile.FileFormat.FieldDelimiterChar == c_Lf ||
                m_CsvFile.FileFormat.FieldDelimiterChar == ' ' ||
                m_CsvFile.FileFormat.FieldDelimiterChar == '\0')
            {
                throw new ApplicationException(
                          "The field delimiter character is invalid, please use something else than CR, LF or Space");
            }

            if (m_CsvFile.FileFormat.FieldDelimiterChar == m_CsvFile.FileFormat.EscapeCharacterChar)
            {
                throw new ApplicationException(
                          $"The escape character is invalid, please use something else than the field delimiter character {FileFormat.GetDescription(m_CsvFile.FileFormat.EscapeCharacter)}.");
            }

            m_HasQualifier |= m_CsvFile.FileFormat.FieldQualifierChar != '\0';

            if (!m_HasQualifier)
            {
                return;
            }
            if (m_CsvFile.FileFormat.FieldQualifierChar == m_CsvFile.FileFormat.FieldDelimiterChar)
            {
                throw new ArgumentOutOfRangeException(
                          $"The text quoting and the field delimiter characters of a delimited file cannot be the same. {m_CsvFile.FileFormat.FieldDelimiterChar}");
            }
            if (m_CsvFile.FileFormat.FieldQualifierChar == c_Cr || m_CsvFile.FileFormat.FieldQualifierChar == c_Lf)
            {
                throw new ApplicationException(
                          "The text quoting characters is invalid, please use something else than CR or LF");
            }
        }
示例#10
0
        private static Func <string, ColumnType, string> GetQualifiedStrFunc(this ICsvFile fileConfig, ILog log)
        {
            var qualifier = fileConfig.Qualifier?.Length > 0 ? fileConfig.Qualifier[0].ToString() : DEFAULT_QUALIFIER;

            if (fileConfig.FixForExcel)
            {
                qualifier = "\"";
            }
            var delimiter        = fileConfig.Delimiter?.Length > 0 ? fileConfig.Delimiter[0].ToString() : DEFAULT_DELIMITER;
            var replaceQualifier = qualifier.getReplaceQualifierFunc(
                fileConfig.FixForExcel ? SurroundedQualifierType.Double : fileConfig.SurroundedQualifier, log);

            if (fileConfig.ForceQualifier || fileConfig.FixForExcel)
            {
                return((source, type) =>
                {
                    if (type == ColumnType.String && source?.All(x => x == '0') == true)
                    {
                        source = "";
                    }
                    var qualifiedStr = string.Concat(qualifier, replaceQualifier(source) ?? "", qualifier);
                    if (fileConfig.FixForExcel)
                    {
                        return string.Concat(qualifier, "=", replaceQualifier(qualifiedStr), qualifier);
                    }
                    return qualifiedStr;
                });
            }

            return((source, type) =>
            {
                if (type == ColumnType.String && source?.All(x => x == '0') == true)
                {
                    source = "";
                }
                if (source.IndexOf(qualifier) > -1 || source.IndexOf(delimiter) > -1)
                {
                    return string.Concat(qualifier, replaceQualifier(source) ?? "", qualifier);
                }
                return source;
            });
        }
示例#11
0
        /// <summary>
        ///  Initializes a new instance of the <see cref="CsvFileWriter" /> class.
        /// </summary>
        /// <param name="file">The file.</param>
        /// <param name="cancellationToken">A cancellation token to stop writing the file</param>
        public CsvFileWriter(ICsvFile file, CancellationToken cancellationToken)
            : base(file, cancellationToken)
        {
            Contract.Requires(file != null);
            m_CsvFile = file;

            m_FieldQualifier = m_CsvFile.FileFormat.FieldQualifierChar.ToString(System.Globalization.CultureInfo.CurrentCulture);
            m_FieldDelimiter = m_CsvFile.FileFormat.FieldDelimiterChar.ToString(System.Globalization.CultureInfo.CurrentCulture);
            if (!string.IsNullOrEmpty(file.FileFormat.EscapeCharacter))
            {
                m_QualifyCharArray      = new[] { (char)0x0a, (char)0x0d };
                m_FieldQualifierEscaped = file.FileFormat.EscapeCharacterChar + m_FieldQualifier;
                m_FieldDelimiterEscaped = file.FileFormat.EscapeCharacterChar + m_FieldDelimiter;
            }
            else
            {
                m_QualifyCharArray      = new[] { (char)0x0a, (char)0x0d, m_CsvFile.FileFormat.FieldDelimiterChar };
                m_FieldQualifierEscaped = new string(m_CsvFile.FileFormat.FieldQualifierChar, 2);
                m_FieldDelimiterEscaped = new string(m_CsvFile.FileFormat.FieldDelimiterChar, 1);
            }
        }
示例#12
0
        /// <summary>
        /// Refreshes the settings assuming the file has changed, checks CodePage, Delimiter, Start Row and Header
        /// </summary>
        /// <param name="file">The file.</param>
        /// <param name="display">The display.</param>
        public static void RefreshCsvFile(this ICsvFile file, IProcessDisplay display)
        {
            Contract.Requires(file != null);
            Contract.Requires(display != null);

            var root = ApplicationSetting.ToolSetting.RootFolder;

            file.FileName.GetAbsolutePath(root);

            display.SetProcess("Checking delimited file");
            GuessCodePage(file);
            if (display.CancellationToken.IsCancellationRequested)
            {
                return;
            }
            display.SetProcess("Code Page: " +
                               EncodingHelper.GetEncodingName(file.CurrentEncoding.CodePage, true, file.ByteOrderMark));

            file.FileFormat.FieldDelimiter = GuessDelimiter(file);
            if (display.CancellationToken.IsCancellationRequested)
            {
                return;
            }
            display.SetProcess("Delimiter: " + file.FileFormat.FieldDelimiter);

            file.SkipRows = GuessStartRow(file);
            if (display.CancellationToken.IsCancellationRequested)
            {
                return;
            }
            if (file.SkipRows > 0)
            {
                display.SetProcess("Start Row: " + file.SkipRows.ToString(CultureInfo.InvariantCulture));
            }

            file.HasFieldHeader = GuessHasHeader(file, display.CancellationToken);
            display.SetProcess("Column Header: " + file.HasFieldHeader);
        }
示例#13
0
        /// <summary>
        ///   Guesses the code page ID of a file
        /// </summary>
        /// <param name="setting">The CSVFile fileSetting</param>
        /// <remarks>
        ///   No Error will be thrown, the CodePage and the BOM will bet set
        /// </remarks>
        public static void GuessCodePage(ICsvFile setting)
        {
            Contract.Requires(setting != null);

            // Read 256 kBytes
            var buff = new byte[262144];
            int length;

            using (var fileStream = ImprovedStream.OpenRead(setting))
            {
                length = fileStream.Stream.Read(buff, 0, buff.Length);
            }

            if (length >= 2)
            {
                var byBom = EncodingHelper.GetCodePageByByteOrderMark(buff);
                if (byBom != 0)
                {
                    setting.ByteOrderMark = true;
                    setting.CodePageId    = byBom;
                    return;
                }
            }

            setting.ByteOrderMark = false;
            var detected = EncodingHelper.GuessCodePageNoBom(buff, length);

            // ASCII will be reported as UTF-8, UTF8 includes ASCII as subset
            if (detected == 20127)
            {
                detected = 65001;
            }

            Log.Info("Detected Code Page: " + EncodingHelper.GetEncodingName(detected, true, setting.ByteOrderMark));
            setting.CodePageId = detected;
        }
示例#14
0
        /// <summary>
        ///   Does check if quoting was actually used in the file
        /// </summary>
        /// <param name="setting">The setting.</param>
        /// <param name="token">The token.</param>
        /// <returns>
        ///   <c>true</c> if [has used qualifier] [the specified setting]; otherwise, <c>false</c>.
        /// </returns>
        public static bool HasUsedQualifier(ICsvFile setting, CancellationToken token)
        {
            Contract.Requires(setting != null);
            // if we do not have a quote defined it does not matter
            if (string.IsNullOrEmpty(setting.FileFormat.FieldQualifier) || token.IsCancellationRequested)
            {
                return(false);
            }

            using (var improvedStream = ImprovedStream.OpenRead(setting))
                using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
                {
                    for (int i = 0; i < setting.SkipRows; i++)
                    {
                        streamReader.ReadLine();
                    }
                    var buff            = new char[262144];
                    var isStartOfColumn = true;
                    while (!streamReader.EndOfStream)
                    {
                        var read = streamReader.ReadBlock(buff, 0, 262143);

                        // Look for Delimiter [Whitespace] Qualifier or StartofLine [Whitespace] Qualifier
                        for (var current = 0; current < read; current++)
                        {
                            if (token.IsCancellationRequested)
                            {
                                return(false);
                            }
                            var c = buff[current];
                            if (c == '\r' || c == '\n' || c == setting.FileFormat.FieldDelimiterChar)
                            {
                                isStartOfColumn = true;
                                continue;
                            }

                            // if we are not at the start of a column we can get the next char
                            if (!isStartOfColumn)
                            {
                                continue;
                            }
                            // If we are at the start of a column and this is a ", we can stop, this is a real qualifier
                            if (c == setting.FileFormat.FieldQualifierChar)
                            {
                                return(true);
                            }
                            // Any non whitespace will reset isStartOfColumn
                            if (c <= '\x00ff')
                            {
                                isStartOfColumn = c == ' ' || c == '\t';
                            }
                            else
                            {
                                isStartOfColumn = CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator;
                            }
                        }
                    }
                }

            return(false);
        }
 public CommandArgumentConverter(ICsvFile csvFile, ILoggerConverter loggerConverter)
 {
     _csvfile         = csvFile;
     _loggerConverter = loggerConverter;
 }
示例#16
0
 public ReAlignColumns(ICsvFile csvFile, int excpectedColumns)
 {
     m_CsvFile          = csvFile;
     m_ExcpectedColumns = excpectedColumns;
 }
示例#17
0
 async public static Task <AuthorCsvImport> BuildAsync(ICsvFile file)
 {
     return(new AuthorCsvImport(await file.ReadLinesAsync()));
 }
示例#18
0
 /// <summary>
 ///   Initializes a new instance of the <see cref="CsvFileWriter" /> class.
 /// </summary>
 /// <param name="file">The file.</param>
 /// <param name="processDisplay">The process display.</param>
 public CsvFileWriter([NotNull] ICsvFile fileSetting, [CanBeNull] IProcessDisplay processDisplay)
     : this(fileSetting.ID, fileSetting.FullPath, fileSetting.HasFieldHeader, fileSetting.FileFormat.ValueFormatMutable, fileSetting.FileFormat, fileSetting.CodePageId,
            fileSetting.ByteOrderMark, fileSetting.ToString(), fileSetting.ColumnCollection.ReadonlyCopy(), fileSetting.Recipient, fileSetting.KeepUnencrypted, fileSetting.IdentifierInContainer,
            fileSetting.Header, fileSetting.Footer, processDisplay)
 {
 }
示例#19
0
 public LoggerConverter(ICsvFile csvFile)
 {
     _csvfile = csvFile;
 }