/// <summary> /// Guesses the not a delimited file. /// </summary> /// <param name="setting"><see cref="ICsvFile" /> with the information</param> /// <returns><c>true</c> if this is most likely not a delimited file</returns> public static bool GuessNotADelimitedFile(ICsvFile setting) { Contract.Requires(setting != null); using (var improvedStream = ImprovedStream.OpenRead(setting)) using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark)) { for (int i = 0; i < setting.SkipRows; i++) { streamReader.ReadLine(); } // If the file doe not have a good delimiter // has empty lines var dc = GetDelimiterCounter(streamReader, '\0', 300); // Have a proper delimiter for (var sep = 0; sep < dc.Separators.Length; sep++) { if (dc.SeparatorRows[sep] >= dc.LastRow * 9 / 10) { Log.Info("Not a delimited file"); return(false); } } } Log.Info("Delimited file"); return(true); }
public CsvFileDataAdapter( ICsvFile csvFile, ICsvFileParser parser, IDataTypeDetector dataTypeDetector) { _csvFile = csvFile; _parser = parser; _dataTypeDetector = dataTypeDetector; }
public FindSkipRows(ICsvFile csvFile) { InitializeComponent(); fileSetting = csvFile; fileSettingBindingSource.DataSource = csvFile; fileFormatBindingSource.DataSource = csvFile.FileFormat; m_Stream = new ImprovedStream(new SourceAccess(csvFile)); UpdateHighlight(); }
/// <summary> /// Determines the start row in the file /// </summary> /// <param name="setting"><see cref="ICsvFile" /> with the information</param> /// <returns> /// The number of rows to skip /// </returns> public static int GuessStartRow(ICsvFile setting) { Contract.Requires(setting != null); using (var improvedStream = ImprovedStream.OpenRead(setting)) using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark)) { return(GuessStartRow(streamReader, setting.FileFormat.FieldDelimiterChar, setting.FileFormat.FieldQualifierChar)); } }
/// <summary> /// Gets the <see cref="Encoding"/> of the textFile /// </summary> /// <param name="setting">The setting.</param> /// <returns></returns> public static Encoding GetEncoding(this ICsvFile setting) { Contract.Requires(setting != null); if (setting.CodePageId < 0) { GuessCodePage(setting); } return(Encoding.GetEncoding(setting.CodePageId)); }
/// <summary> /// Try to guess the new line sequence /// </summary> /// <param name="setting"><see cref="ICsvFile" /> with the information</param> /// <returns>The NewLine Combination used</returns> public static string GuessNewline(ICsvFile setting) { Contract.Requires(setting != null); using (var improvedStream = ImprovedStream.OpenRead(setting)) using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark)) { for (int i = 0; i < setting.SkipRows; i++) { streamReader.ReadLine(); } return(GuessNewline(streamReader, setting.FileFormat.FieldQualifierChar)); } }
/// <summary> /// Opens the csv file, and tries to read the headers /// </summary> /// <param name="setting">The CSVFile fileSetting</param> /// <param name="processDisplay">The process display.</param> /// <returns> /// <c>True</c> we could use the first row as header, <c>false</c> should not use first row as header /// </returns> public static bool GuessHasHeader(ICsvFile setting, CancellationToken cancellationToken) { Contract.Requires(setting != null); // Only do so if HasFieldHeader is still true if (!setting.HasFieldHeader) { Log.Info("Without Header Row"); return(false); } using (var csvDataReader = new CsvFileReader(setting)) { csvDataReader.Open(false, cancellationToken); var defaultNames = 0; // In addition check that all columns have real names and did not get an artificial name // or are numbers for (var counter = 0; counter < csvDataReader.FieldCount; counter++) { var columnName = csvDataReader.GetName(counter); // if replaced by a default assume no header if (columnName.Equals(BaseFileReader.GetDefaultName(counter), StringComparison.OrdinalIgnoreCase)) { if (defaultNames++ == (int)Math.Ceiling(csvDataReader.FieldCount / 2.0)) { Log.Info("Without Header Row"); return(false); } } // if its a number assume no headers if (StringConversion.StringToDecimal(columnName, '.', ',', false).HasValue) { Log.Info("Without Header Row"); return(false); } // if its rather long assume no header if (columnName.Length > 80) { Log.Info("Without Header Row"); return(false); } } Log.Info("With Header Row"); // if there is only one line assume its does not have a header return(true); } }
/// <summary> /// Guesses the delimiter for a files. Done with a rather simple csv parsing, and trying to find /// the delimiter that has the least variance in the read rows, if that is not possible the /// delimiter with the highest number of occurrences. /// </summary> /// <param name="setting">The CSVFile fileSetting</param> /// <returns> /// A character with the assumed delimiter for the file /// </returns> /// <remarks> /// No Error will not be thrown. /// </remarks> public static string GuessDelimiter(ICsvFile setting) { Contract.Requires(setting != null); Contract.Ensures(Contract.Result <string>() != null); using (var improvedStream = ImprovedStream.OpenRead(setting)) using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark)) { for (int i = 0; i < setting.SkipRows; i++) { streamReader.ReadLine(); } return(GuessDelimiter(streamReader, setting.FileFormat.EscapeCharacterChar)); } }
public CsvFileReader(ICsvFile fileSetting) : base(fileSetting) { m_CsvFile = fileSetting; if (string.IsNullOrEmpty(m_CsvFile.FileName)) { throw new ApplicationException("FileName must be set"); } if (ApplicationSetting.RemoteFileHandler != null && string.IsNullOrEmpty(m_CsvFile?.RemoteFileName)) { if (!FileSystemUtils.FileExists(m_CsvFile.FullPath)) { throw new FileNotFoundException( $"The file '{FileSystemUtils.GetShortDisplayFileName(m_CsvFile.FileName, 80)}' does not exist or is not accessible.", m_CsvFile.FileName); } } if (m_CsvFile.FileFormat.FieldDelimiterChar == c_Cr || m_CsvFile.FileFormat.FieldDelimiterChar == c_Lf || m_CsvFile.FileFormat.FieldDelimiterChar == ' ' || m_CsvFile.FileFormat.FieldDelimiterChar == '\0') { throw new ApplicationException( "The field delimiter character is invalid, please use something else than CR, LF or Space"); } if (m_CsvFile.FileFormat.FieldDelimiterChar == m_CsvFile.FileFormat.EscapeCharacterChar) { throw new ApplicationException( $"The escape character is invalid, please use something else than the field delimiter character {FileFormat.GetDescription(m_CsvFile.FileFormat.EscapeCharacter)}."); } m_HasQualifier |= m_CsvFile.FileFormat.FieldQualifierChar != '\0'; if (!m_HasQualifier) { return; } if (m_CsvFile.FileFormat.FieldQualifierChar == m_CsvFile.FileFormat.FieldDelimiterChar) { throw new ArgumentOutOfRangeException( $"The text quoting and the field delimiter characters of a delimited file cannot be the same. {m_CsvFile.FileFormat.FieldDelimiterChar}"); } if (m_CsvFile.FileFormat.FieldQualifierChar == c_Cr || m_CsvFile.FileFormat.FieldQualifierChar == c_Lf) { throw new ApplicationException( "The text quoting characters is invalid, please use something else than CR or LF"); } }
private static Func <string, ColumnType, string> GetQualifiedStrFunc(this ICsvFile fileConfig, ILog log) { var qualifier = fileConfig.Qualifier?.Length > 0 ? fileConfig.Qualifier[0].ToString() : DEFAULT_QUALIFIER; if (fileConfig.FixForExcel) { qualifier = "\""; } var delimiter = fileConfig.Delimiter?.Length > 0 ? fileConfig.Delimiter[0].ToString() : DEFAULT_DELIMITER; var replaceQualifier = qualifier.getReplaceQualifierFunc( fileConfig.FixForExcel ? SurroundedQualifierType.Double : fileConfig.SurroundedQualifier, log); if (fileConfig.ForceQualifier || fileConfig.FixForExcel) { return((source, type) => { if (type == ColumnType.String && source?.All(x => x == '0') == true) { source = ""; } var qualifiedStr = string.Concat(qualifier, replaceQualifier(source) ?? "", qualifier); if (fileConfig.FixForExcel) { return string.Concat(qualifier, "=", replaceQualifier(qualifiedStr), qualifier); } return qualifiedStr; }); } return((source, type) => { if (type == ColumnType.String && source?.All(x => x == '0') == true) { source = ""; } if (source.IndexOf(qualifier) > -1 || source.IndexOf(delimiter) > -1) { return string.Concat(qualifier, replaceQualifier(source) ?? "", qualifier); } return source; }); }
/// <summary> /// Initializes a new instance of the <see cref="CsvFileWriter" /> class. /// </summary> /// <param name="file">The file.</param> /// <param name="cancellationToken">A cancellation token to stop writing the file</param> public CsvFileWriter(ICsvFile file, CancellationToken cancellationToken) : base(file, cancellationToken) { Contract.Requires(file != null); m_CsvFile = file; m_FieldQualifier = m_CsvFile.FileFormat.FieldQualifierChar.ToString(System.Globalization.CultureInfo.CurrentCulture); m_FieldDelimiter = m_CsvFile.FileFormat.FieldDelimiterChar.ToString(System.Globalization.CultureInfo.CurrentCulture); if (!string.IsNullOrEmpty(file.FileFormat.EscapeCharacter)) { m_QualifyCharArray = new[] { (char)0x0a, (char)0x0d }; m_FieldQualifierEscaped = file.FileFormat.EscapeCharacterChar + m_FieldQualifier; m_FieldDelimiterEscaped = file.FileFormat.EscapeCharacterChar + m_FieldDelimiter; } else { m_QualifyCharArray = new[] { (char)0x0a, (char)0x0d, m_CsvFile.FileFormat.FieldDelimiterChar }; m_FieldQualifierEscaped = new string(m_CsvFile.FileFormat.FieldQualifierChar, 2); m_FieldDelimiterEscaped = new string(m_CsvFile.FileFormat.FieldDelimiterChar, 1); } }
/// <summary> /// Refreshes the settings assuming the file has changed, checks CodePage, Delimiter, Start Row and Header /// </summary> /// <param name="file">The file.</param> /// <param name="display">The display.</param> public static void RefreshCsvFile(this ICsvFile file, IProcessDisplay display) { Contract.Requires(file != null); Contract.Requires(display != null); var root = ApplicationSetting.ToolSetting.RootFolder; file.FileName.GetAbsolutePath(root); display.SetProcess("Checking delimited file"); GuessCodePage(file); if (display.CancellationToken.IsCancellationRequested) { return; } display.SetProcess("Code Page: " + EncodingHelper.GetEncodingName(file.CurrentEncoding.CodePage, true, file.ByteOrderMark)); file.FileFormat.FieldDelimiter = GuessDelimiter(file); if (display.CancellationToken.IsCancellationRequested) { return; } display.SetProcess("Delimiter: " + file.FileFormat.FieldDelimiter); file.SkipRows = GuessStartRow(file); if (display.CancellationToken.IsCancellationRequested) { return; } if (file.SkipRows > 0) { display.SetProcess("Start Row: " + file.SkipRows.ToString(CultureInfo.InvariantCulture)); } file.HasFieldHeader = GuessHasHeader(file, display.CancellationToken); display.SetProcess("Column Header: " + file.HasFieldHeader); }
/// <summary> /// Guesses the code page ID of a file /// </summary> /// <param name="setting">The CSVFile fileSetting</param> /// <remarks> /// No Error will be thrown, the CodePage and the BOM will bet set /// </remarks> public static void GuessCodePage(ICsvFile setting) { Contract.Requires(setting != null); // Read 256 kBytes var buff = new byte[262144]; int length; using (var fileStream = ImprovedStream.OpenRead(setting)) { length = fileStream.Stream.Read(buff, 0, buff.Length); } if (length >= 2) { var byBom = EncodingHelper.GetCodePageByByteOrderMark(buff); if (byBom != 0) { setting.ByteOrderMark = true; setting.CodePageId = byBom; return; } } setting.ByteOrderMark = false; var detected = EncodingHelper.GuessCodePageNoBom(buff, length); // ASCII will be reported as UTF-8, UTF8 includes ASCII as subset if (detected == 20127) { detected = 65001; } Log.Info("Detected Code Page: " + EncodingHelper.GetEncodingName(detected, true, setting.ByteOrderMark)); setting.CodePageId = detected; }
/// <summary> /// Does check if quoting was actually used in the file /// </summary> /// <param name="setting">The setting.</param> /// <param name="token">The token.</param> /// <returns> /// <c>true</c> if [has used qualifier] [the specified setting]; otherwise, <c>false</c>. /// </returns> public static bool HasUsedQualifier(ICsvFile setting, CancellationToken token) { Contract.Requires(setting != null); // if we do not have a quote defined it does not matter if (string.IsNullOrEmpty(setting.FileFormat.FieldQualifier) || token.IsCancellationRequested) { return(false); } using (var improvedStream = ImprovedStream.OpenRead(setting)) using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark)) { for (int i = 0; i < setting.SkipRows; i++) { streamReader.ReadLine(); } var buff = new char[262144]; var isStartOfColumn = true; while (!streamReader.EndOfStream) { var read = streamReader.ReadBlock(buff, 0, 262143); // Look for Delimiter [Whitespace] Qualifier or StartofLine [Whitespace] Qualifier for (var current = 0; current < read; current++) { if (token.IsCancellationRequested) { return(false); } var c = buff[current]; if (c == '\r' || c == '\n' || c == setting.FileFormat.FieldDelimiterChar) { isStartOfColumn = true; continue; } // if we are not at the start of a column we can get the next char if (!isStartOfColumn) { continue; } // If we are at the start of a column and this is a ", we can stop, this is a real qualifier if (c == setting.FileFormat.FieldQualifierChar) { return(true); } // Any non whitespace will reset isStartOfColumn if (c <= '\x00ff') { isStartOfColumn = c == ' ' || c == '\t'; } else { isStartOfColumn = CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator; } } } } return(false); }
public CommandArgumentConverter(ICsvFile csvFile, ILoggerConverter loggerConverter) { _csvfile = csvFile; _loggerConverter = loggerConverter; }
public ReAlignColumns(ICsvFile csvFile, int excpectedColumns) { m_CsvFile = csvFile; m_ExcpectedColumns = excpectedColumns; }
async public static Task <AuthorCsvImport> BuildAsync(ICsvFile file) { return(new AuthorCsvImport(await file.ReadLinesAsync())); }
/// <summary> /// Initializes a new instance of the <see cref="CsvFileWriter" /> class. /// </summary> /// <param name="file">The file.</param> /// <param name="processDisplay">The process display.</param> public CsvFileWriter([NotNull] ICsvFile fileSetting, [CanBeNull] IProcessDisplay processDisplay) : this(fileSetting.ID, fileSetting.FullPath, fileSetting.HasFieldHeader, fileSetting.FileFormat.ValueFormatMutable, fileSetting.FileFormat, fileSetting.CodePageId, fileSetting.ByteOrderMark, fileSetting.ToString(), fileSetting.ColumnCollection.ReadonlyCopy(), fileSetting.Recipient, fileSetting.KeepUnencrypted, fileSetting.IdentifierInContainer, fileSetting.Header, fileSetting.Footer, processDisplay) { }
public LoggerConverter(ICsvFile csvFile) { _csvfile = csvFile; }