Beispiel #1
0
        /// <summary>
        /// Construct a new CSV reader off a streamed source
        /// </summary>
        /// <param name="source">The stream source. Note that when disposed, the CSV Reader will dispose the stream reader.</param>
        /// <param name="settings">The CSV settings to use for this reader (Default: CSV)</param>
        public CSVReader(Stream source, CSVSettings settings = null)
        {
            _settings = settings;
            if (_settings == null)
            {
                _settings = CSVSettings.CSV;
            }
            _stream = new StreamReader(source, _settings.Encoding);

            // Do we need to parse headers?
            if (_settings.HeaderRowIncluded)
            {
                var line = _stream.ReadLine();
                if (_settings.AllowSepLine)
                {
                    var newDelimiter = CSV.ParseSepLine(line);
                    if (newDelimiter != null)
                    {
                        // We don't want to change the original settings, since they may be a singleton
                        _settings = _settings.CloneWithNewDelimiter(newDelimiter.Value);
                        line      = _stream.ReadLine();
                    }
                }

                Headers = CSV.ParseLine(line, _settings);
            }
            else
            {
                Headers = _settings.AssumedHeaders;
            }
        }
Beispiel #2
0
        /// <summary>
        /// Parse a new chunk of text retrieved via some other means than a stream.
        ///
        /// Call this function when you are retrieving your own text and when each chunk may or may not
        /// include line separators, and your stream does not consume line separators on its own.
        /// </summary>
        /// <param name="chunk">The new data to process</param>
        /// <param name="reachedEnd">Set this value to true </param>
        /// <returns>If this parsing operation produces a valid row, this will be non-null</returns>
        public string[] ParseChunk(string chunk, bool reachedEnd)
        {
            // Detect end of stream
            if (reachedEnd && string.IsNullOrEmpty(chunk) && _position == -1)
            {
                State = CSVState.Done;
                return(null);
            }

            // Add this chunk to the current processing logic
            _line += chunk;

            // Check for the presence of a "sep=" line once at the beginning of a stream
            if (_allowSepLine)
            {
                var newDelimiter = CSV.ParseSepLine(_line);
                _allowSepLine = false;
                if (newDelimiter != null)
                {
                    _delimiter = newDelimiter.Value;
                    return(null);
                }
            }

            // Process one character at a time from the current line
            while (_position < _line.Length || !reachedEnd)
            {
                _position++;

                // Have we reached the end of the stream?
                if (_position >= _line.Length)
                {
                    if (reachedEnd)
                    {
                        // If we reached the end while still in a text qualifier, the CSV is broken
                        if (_inTextQualifier)
                        {
                            State = CSVState.MissingTrailingQualifier;
                            return(null);
                        }

                        // We always add the final work item here because trailing empty strings are valid
                        State = CSVState.Done;
                        _list.Add(_work.ToString());
                        _line     = string.Empty;
                        _position = -1;
                        return(_list.ToArray());
                    }
                    return(null);
                }
                var c = _line[_position];

                // Are we currently processing a text block (which may optionally span multiple lines)?
                if (_inTextQualifier || (!_inTextQualifier && c == _settings.TextQualifier && _work.Length == 0))
                {
                    if (_inTextQualifier)
                    {
                        _work.Append(c);
                    }
                    _inTextQualifier = true;

                    // Our next task is to find the end of this qualified-text field
                    var p2 = -1;
                    while (p2 < 0)
                    {
                        // If we don't see an end in sight, read more from the stream
                        p2 = _line.IndexOf(_settings.TextQualifier, _position + 1);
                        if (p2 < 0)
                        {
                            // No text qualifiers yet? Let's read more from the stream and continue
                            _work.Append(_line.Substring(_position + 1));
                            _line     = string.Empty;
                            _position = -1;
                            if (reachedEnd)
                            {
                                State = CSVState.MissingTrailingQualifier;
                            }
                            return(null);
                        }

                        // Append the text between the qualifiers
                        _work.Append(_line.Substring(_position + 1, p2 - _position - 1));
                        _position = p2;

                        // If the user put in a doubled-up qualifier, e.g. `""`, insert a single one and continue
                        if (p2 + 1 < _line.Length && _line[p2 + 1] == _settings.TextQualifier)
                        {
                            _work.Append(_settings.TextQualifier);
                            _position++;
                            p2 = -1;
                        }
                    }

                    // We're done parsing this text qualifier
                    _inTextQualifier = false;
                }
                // Are we at a line separator? Let's do a quick test first
                else if (c == _settings.LineSeparator[0] && _position + _settings.LineSeparator.Length <= _line.Length)
                {
                    if (string.Equals(_line.Substring(_position, _settings.LineSeparator.Length),
                                      _settings.LineSeparator))
                    {
                        _line     = _line.Substring(_position + _settings.LineSeparator.Length);
                        _position = -1;
                        _list.Add(_work.ToString());
                        var row = _list.ToArray();
                        _list.Clear();
                        _work.Length = 0;
                        return(row);
                    }
                }
                // Does this start a new field?
                else if (c == _delimiter)
                {
                    // Is this a null token, and do we permit null tokens?
                    var s = _work.ToString();
                    if (_settings.AllowNull && string.Equals(s, _settings.NullToken, StringComparison.Ordinal))
                    {
                        _list.Add(null);
                    }
                    else
                    {
                        _list.Add(s);
                    }
                    _work.Length = 0;

                    // Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space
                    // Checks if the second parameter of the if statement will pass through successfully
                    // e.g. `"bob", "mary", "bill"`
                    if (_position + 2 <= _line.Length - 1)
                    {
                        if (_line[_position + 1].Equals(' ') && _line[_position + 2].Equals(_settings.TextQualifier))
                        {
                            _position++;
                        }
                    }
                }
                // Regular character
                else
                {
                    _work.Append(c);
                }
            }

            State = CSVState.Done;
            return(null);
        }