Beispiel #1
0
        /// <summary>
        ///   Guesses the not a delimited file.
        /// </summary>
        /// <param name="setting"><see cref="ICsvFile" /> with the information</param>
        /// <returns><c>true</c> if this is most likely not a delimited file</returns>
        public static bool GuessNotADelimitedFile(ICsvFile setting)
        {
            Contract.Requires(setting != null);
            using (var improvedStream = ImprovedStream.OpenRead(setting))
                using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
                {
                    for (int i = 0; i < setting.SkipRows; i++)
                    {
                        streamReader.ReadLine();
                    }
                    // If the file doe not have a good delimiter
                    // has empty lines
                    var dc = GetDelimiterCounter(streamReader, '\0', 300);

                    // Have a proper delimiter
                    for (var sep = 0; sep < dc.Separators.Length; sep++)
                    {
                        if (dc.SeparatorRows[sep] >= dc.LastRow * 9 / 10)
                        {
                            Log.Info("Not a delimited file");
                            return(false);
                        }
                    }
                }
            Log.Info("Delimited file");
            return(true);
        }
Beispiel #2
0
        /// <summary>
        /// Opens an file for writing
        /// </summary>
        /// <param name="path">The path.</param>
        /// <remarks>
        /// There could be one steps a) in case its to be uploaded a temp file will be created
        /// </remarks>
        public static ImprovedStream OpenWrite(string path, IProcessDisplay processDisplay = null, string recipient = null)
        {
            var retVal = new ImprovedStream()
            {
                WritePath      = path.RemovePrefix(),
                ProcessDisplay = processDisplay,
                Recipient      = recipient
            };

            if (path.AssumePgp() || path.AssumeGZip())
            {
                Log.Debug("Creating temporary file");
                retVal.TempFile = Path.GetTempFileName();

                // download the file to a temp file
                retVal.BaseStream = File.Create(retVal.TempFile);
            }
            else
            {
                FileSystemUtils.FileDelete(path.LongPathPrefix());
                retVal.BaseStream = File.Create(path.LongPathPrefix());
            }

            retVal.Stream = retVal.BaseStream;
            return(retVal);
        }
Beispiel #3
0
 /// <summary>
 ///   Determines the start row in the file
 /// </summary>
 /// <param name="setting"><see cref="ICsvFile" /> with the information</param>
 /// <returns>
 ///   The number of rows to skip
 /// </returns>
 public static int GuessStartRow(ICsvFile setting)
 {
     Contract.Requires(setting != null);
     using (var improvedStream = ImprovedStream.OpenRead(setting))
         using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
         {
             return(GuessStartRow(streamReader, setting.FileFormat.FieldDelimiterChar,
                                  setting.FileFormat.FieldQualifierChar));
         }
 }
Beispiel #4
0
 /// <summary>
 ///   Try to guess the new line sequence
 /// </summary>
 /// <param name="setting"><see cref="ICsvFile" /> with the information</param>
 /// <returns>The NewLine Combination used</returns>
 public static string GuessNewline(ICsvFile setting)
 {
     Contract.Requires(setting != null);
     using (var improvedStream = ImprovedStream.OpenRead(setting))
         using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
         {
             for (int i = 0; i < setting.SkipRows; i++)
             {
                 streamReader.ReadLine();
             }
             return(GuessNewline(streamReader, setting.FileFormat.FieldQualifierChar));
         }
 }
Beispiel #5
0
 /// <summary>
 ///   Guesses the delimiter for a files. Done with a rather simple csv parsing, and trying to find
 ///   the delimiter that has the least variance in the read rows, if that is not possible the
 ///   delimiter with the highest number of occurrences.
 /// </summary>
 /// <param name="setting">The CSVFile fileSetting</param>
 /// <returns>
 ///   A character with the assumed delimiter for the file
 /// </returns>
 /// <remarks>
 ///   No Error will not be thrown.
 /// </remarks>
 public static string GuessDelimiter(ICsvFile setting)
 {
     Contract.Requires(setting != null);
     Contract.Ensures(Contract.Result <string>() != null);
     using (var improvedStream = ImprovedStream.OpenRead(setting))
         using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
         {
             for (int i = 0; i < setting.SkipRows; i++)
             {
                 streamReader.ReadLine();
             }
             return(GuessDelimiter(streamReader, setting.FileFormat.EscapeCharacterChar));
         }
 }
Beispiel #6
0
        private void UpdateView()
        {
            m_DisplayedAt = ScrollBarVertical.Value;
            if (string.IsNullOrEmpty(m_CsvFile.FileName))
            {
                return;
            }
            try
            {
                using (var procDisp = new ProcessDisplayTime(System.Threading.CancellationToken.None))
                    using (var istream = ImprovedStream.OpenRead(m_CsvFile))
                        using (var sr = new StreamReader(istream.Stream, m_CsvFile.GetEncoding(), m_CsvFile.ByteOrderMark))
                        {
                            // Some stream do not support seek...
                            if (istream.Stream.CanSeek)
                            {
                                istream.Stream.Seek(m_DisplayedAt, SeekOrigin.Begin);
                                if (m_DisplayedAt != 0)
                                {
                                    // find the line start
                                    var read = sr.Read();
                                    while (read != 13 && read != 10 && !sr.EndOfStream)
                                    {
                                        read = sr.Read();
                                    }

                                    var next = sr.Peek();
                                    if (read == 13 && next == 10 || read == 10 && next == 13)
                                    {
                                        sr.Read();
                                    }
                                }
                            }
                            else
                            {
                                ScrollBarVertical.Enabled = false;
                            }

                            var buffer = new char[32000];
                            var len    = sr.Read(buffer, 0, buffer.Length);
                            CSVTextBox.Text = new string(buffer, 0, len);
                        }
            }
            catch (Exception exc)
            {
                CSVTextBox.Text = exc.ExceptionMessages();
            }
        }
Beispiel #7
0
        /// <summary>
        ///  Resets the position and buffer to the first line, excluding headers, use ResetPositionToStart if you want to go to
        ///  first data line
        /// </summary>
        private void ResetPositionToStart()
        {
            if (m_ImprovedStream == null)
            {
                m_ImprovedStream = ImprovedStream.OpenRead(m_CsvFile);
            }
            m_ImprovedStream.ResetToStart(delegate(Stream str)
            {
                // in case we can not seek need to reopen the stream reader
                if (!str.CanSeek || m_TextReader == null)
                {
                    if (m_TextReader != null)
                    {
                        m_TextReader.Dispose();
                    }
                    m_TextReader = new StreamReader(str, m_CsvFile.GetEncoding(), m_CsvFile.ByteOrderMark);
                }
                else
                {
                    // only need to discard the buffer
                    m_TextReader.DiscardBufferedData();
                }
            });

            m_CsvFile.CurrentEncoding = m_TextReader.CurrentEncoding;
            m_BufferPos    = 0;
            m_BufferFilled = 0;
            // End Line should be at 1, later on as the line is read the start line s set to this value
            EndLineNumber = 1;
            RecordNumber  = 0;
            m_EndOfLine   = false;
            EndOfFile     = false;

            // Skip the given number of lines
            // <= so we do skip the right number
            while (EndLineNumber <= m_CsvFile.SkipRows && !EndOfFile && !CancellationToken.IsCancellationRequested)
            {
                ReadToEOL();
            }
        }
Beispiel #8
0
        /// <summary>
        ///   Guesses the code page ID of a file
        /// </summary>
        /// <param name="setting">The CSVFile fileSetting</param>
        /// <remarks>
        ///   No Error will be thrown, the CodePage and the BOM will bet set
        /// </remarks>
        public static void GuessCodePage(ICsvFile setting)
        {
            Contract.Requires(setting != null);

            // Read 256 kBytes
            var buff = new byte[262144];
            int length;

            using (var fileStream = ImprovedStream.OpenRead(setting))
            {
                length = fileStream.Stream.Read(buff, 0, buff.Length);
            }

            if (length >= 2)
            {
                var byBom = EncodingHelper.GetCodePageByByteOrderMark(buff);
                if (byBom != 0)
                {
                    setting.ByteOrderMark = true;
                    setting.CodePageId    = byBom;
                    return;
                }
            }

            setting.ByteOrderMark = false;
            var detected = EncodingHelper.GuessCodePageNoBom(buff, length);

            // ASCII will be reported as UTF-8, UTF8 includes ASCII as subset
            if (detected == 20127)
            {
                detected = 65001;
            }

            Log.Info("Detected Code Page: " + EncodingHelper.GetEncodingName(detected, true, setting.ByteOrderMark));
            setting.CodePageId = detected;
        }
Beispiel #9
0
        /// <summary>
        /// Opens the base stream, handling sFTP access
        /// </summary>
        /// <param name="path">The path.</param>
        /// <param name="encryptedPassphraseFunc">The encrypted passphrase function.</param>
        /// <returns>
        /// An improved stream where the base stream is set
        /// </returns>
        private static ImprovedStream OpenBaseStream(string path, Func <string> encryptedPassphraseFunc)
        {
            var retVal = new ImprovedStream
            {
                AssumePGP  = path.AssumePgp(),
                AssumeGZip = path.AssumeGZip(),
            };

            if (retVal.AssumePGP && encryptedPassphraseFunc != null)
            {
                retVal.EncryptedPassphrase = encryptedPassphraseFunc();
            }
            try
            {
                retVal.BasePath   = path;
                retVal.BaseStream = File.OpenRead(path);
                return(retVal);
            }
            catch (Exception)
            {
                retVal.Close();
                throw;
            }
        }
Beispiel #10
0
        /// <summary>
        ///   Does check if quoting was actually used in the file
        /// </summary>
        /// <param name="setting">The setting.</param>
        /// <param name="token">The token.</param>
        /// <returns>
        ///   <c>true</c> if [has used qualifier] [the specified setting]; otherwise, <c>false</c>.
        /// </returns>
        public static bool HasUsedQualifier(ICsvFile setting, CancellationToken token)
        {
            Contract.Requires(setting != null);
            // if we do not have a quote defined it does not matter
            if (string.IsNullOrEmpty(setting.FileFormat.FieldQualifier) || token.IsCancellationRequested)
            {
                return(false);
            }

            using (var improvedStream = ImprovedStream.OpenRead(setting))
                using (var streamReader = new StreamReader(improvedStream.Stream, setting.GetEncoding(), setting.ByteOrderMark))
                {
                    for (int i = 0; i < setting.SkipRows; i++)
                    {
                        streamReader.ReadLine();
                    }
                    var buff            = new char[262144];
                    var isStartOfColumn = true;
                    while (!streamReader.EndOfStream)
                    {
                        var read = streamReader.ReadBlock(buff, 0, 262143);

                        // Look for Delimiter [Whitespace] Qualifier or StartofLine [Whitespace] Qualifier
                        for (var current = 0; current < read; current++)
                        {
                            if (token.IsCancellationRequested)
                            {
                                return(false);
                            }
                            var c = buff[current];
                            if (c == '\r' || c == '\n' || c == setting.FileFormat.FieldDelimiterChar)
                            {
                                isStartOfColumn = true;
                                continue;
                            }

                            // if we are not at the start of a column we can get the next char
                            if (!isStartOfColumn)
                            {
                                continue;
                            }
                            // If we are at the start of a column and this is a ", we can stop, this is a real qualifier
                            if (c == setting.FileFormat.FieldQualifierChar)
                            {
                                return(true);
                            }
                            // Any non whitespace will reset isStartOfColumn
                            if (c <= '\x00ff')
                            {
                                isStartOfColumn = c == ' ' || c == '\t';
                            }
                            else
                            {
                                isStartOfColumn = CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator;
                            }
                        }
                    }
                }

            return(false);
        }