/// <summary> /// Construct a new CSV reader off a streamed source /// </summary> /// <param name="source">The stream source. Note that when disposed, the CSV Reader will dispose the stream reader.</param> /// <param name="settings">The CSV settings to use for this reader (Default: CSV)</param> public CSVReader(Stream source, CSVSettings settings = null) { _settings = settings; if (_settings == null) { _settings = CSVSettings.CSV; } _stream = new StreamReader(source, _settings.Encoding); // Do we need to parse headers? if (_settings.HeaderRowIncluded) { var line = _stream.ReadLine(); if (_settings.AllowSepLine) { var newDelimiter = CSV.ParseSepLine(line); if (newDelimiter != null) { // We don't want to change the original settings, since they may be a singleton _settings = _settings.CloneWithNewDelimiter(newDelimiter.Value); line = _stream.ReadLine(); } } Headers = CSV.ParseLine(line, _settings); } else { Headers = _settings.AssumedHeaders; } }
/// <summary> /// Parse a new chunk of text retrieved via some other means than a stream. /// /// Call this function when you are retrieving your own text and when each chunk may or may not /// include line separators, and your stream does not consume line separators on its own. /// </summary> /// <param name="chunk">The new data to process</param> /// <param name="reachedEnd">Set this value to true </param> /// <returns>If this parsing operation produces a valid row, this will be non-null</returns> public string[] ParseChunk(string chunk, bool reachedEnd) { // Detect end of stream if (reachedEnd && string.IsNullOrEmpty(chunk) && _position == -1) { State = CSVState.Done; return(null); } // Add this chunk to the current processing logic _line += chunk; // Check for the presence of a "sep=" line once at the beginning of a stream if (_allowSepLine) { var newDelimiter = CSV.ParseSepLine(_line); _allowSepLine = false; if (newDelimiter != null) { _delimiter = newDelimiter.Value; return(null); } } // Process one character at a time from the current line while (_position < _line.Length || !reachedEnd) { _position++; // Have we reached the end of the stream? if (_position >= _line.Length) { if (reachedEnd) { // If we reached the end while still in a text qualifier, the CSV is broken if (_inTextQualifier) { State = CSVState.MissingTrailingQualifier; return(null); } // We always add the final work item here because trailing empty strings are valid State = CSVState.Done; _list.Add(_work.ToString()); _line = string.Empty; _position = -1; return(_list.ToArray()); } return(null); } var c = _line[_position]; // Are we currently processing a text block (which may optionally span multiple lines)? if (_inTextQualifier || (!_inTextQualifier && c == _settings.TextQualifier && _work.Length == 0)) { if (_inTextQualifier) { _work.Append(c); } _inTextQualifier = true; // Our next task is to find the end of this qualified-text field var p2 = -1; while (p2 < 0) { // If we don't see an end in sight, read more from the stream p2 = _line.IndexOf(_settings.TextQualifier, _position + 1); if (p2 < 0) { // No text qualifiers yet? Let's read more from the stream and continue _work.Append(_line.Substring(_position + 1)); _line = string.Empty; _position = -1; if (reachedEnd) { State = CSVState.MissingTrailingQualifier; } return(null); } // Append the text between the qualifiers _work.Append(_line.Substring(_position + 1, p2 - _position - 1)); _position = p2; // If the user put in a doubled-up qualifier, e.g. `""`, insert a single one and continue if (p2 + 1 < _line.Length && _line[p2 + 1] == _settings.TextQualifier) { _work.Append(_settings.TextQualifier); _position++; p2 = -1; } } // We're done parsing this text qualifier _inTextQualifier = false; } // Are we at a line separator? Let's do a quick test first else if (c == _settings.LineSeparator[0] && _position + _settings.LineSeparator.Length <= _line.Length) { if (string.Equals(_line.Substring(_position, _settings.LineSeparator.Length), _settings.LineSeparator)) { _line = _line.Substring(_position + _settings.LineSeparator.Length); _position = -1; _list.Add(_work.ToString()); var row = _list.ToArray(); _list.Clear(); _work.Length = 0; return(row); } } // Does this start a new field? else if (c == _delimiter) { // Is this a null token, and do we permit null tokens? var s = _work.ToString(); if (_settings.AllowNull && string.Equals(s, _settings.NullToken, StringComparison.Ordinal)) { _list.Add(null); } else { _list.Add(s); } _work.Length = 0; // Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space // Checks if the second parameter of the if statement will pass through successfully // e.g. `"bob", "mary", "bill"` if (_position + 2 <= _line.Length - 1) { if (_line[_position + 1].Equals(' ') && _line[_position + 2].Equals(_settings.TextQualifier)) { _position++; } } } // Regular character else { _work.Append(c); } } State = CSVState.Done; return(null); }