// TODO: Check exception handling. Original scala exception handling might be faulty?, it returns exceptions when it is parsing, but disregards any csv errors when doing return checks at the end. private List <ByteString> ParseLine(bool requireLineEnd) { var buffer = _buffer; var columns = new List <ByteString>(); var state = State.LineStart; var fieldBuilder = new FieldBuilder(buffer, this); void WrongCharEscaped() { throw new MalformedCsvException($"wrong escaping at {_currentLineNo}:{_position}, only escape or delimiter may be escaped"); } void WrongCharEscapedWithinQuotes() { throw new MalformedCsvException($"wrong escaping at {_currentLineNo}:{_position}, only escape or quote may be escaped within quotes"); } void NoCharEscaped() { throw new MalformedCsvException($"wrong escaping at {_currentLineNo}:{_position}, no character after escape"); } void ReadPastLf() { if (_position < buffer.Count && buffer[_position] == Lf) { _position++; } } void CheckForByteOrderMark() { if (buffer.Count >= 2) { if (buffer.StartsWith(ByteOrderMark.UTF8)) { _position = 3; _fieldStart = 3; } else { if (buffer.StartsWith(ByteOrderMark.UTF16_LE)) { throw new UnsupportedCharsetException("UTF-16 LE and UTF-32 LE"); } if (buffer.StartsWith(ByteOrderMark.UTF16_BE)) { throw new UnsupportedCharsetException("UTF-16 BE"); } if (buffer.StartsWith(ByteOrderMark.UTF32_BE)) { throw new UnsupportedCharsetException("UTF-32 BE"); } } } } if (_firstData) { CheckForByteOrderMark(); _firstData = false; } while (state != State.LineEnd && _position < buffer.Count) { var b = buffer[_position]; switch (state) { case State.LineStart: if (b == _quoteChar) { state = State.QuoteStarted; _position++; _fieldStart = _position; continue; } if (b == _delimiter) { columns.Add(ByteString.Empty); state = State.AfterDelimiter; _position++; _fieldStart = _position; continue; } switch (b) { case Lf: columns.Add(ByteString.Empty); state = State.LineEnd; _position++; _fieldStart = _position; break; case Cr: columns.Add(ByteString.Empty); state = State.LineEnd; _position++; ReadPastLf(); _fieldStart = _position; break; default: fieldBuilder.Add(b); state = State.WithinField; _position++; break; } break; case State.AfterDelimiter: if (b == _quoteChar) { state = State.QuoteStarted; _position++; _fieldStart = _position; continue; } if (b == _escapeChar) { if (_position + 1 >= buffer.Count) { NoCharEscaped(); } if (buffer[_position + 1] != _escapeChar && buffer[_position + 1] != _delimiter) { WrongCharEscaped(); } fieldBuilder.Init(buffer[_position + 1]); state = State.WithinField; _position += 2; continue; } if (b == _delimiter) { columns.Add(ByteString.Empty); state = State.AfterDelimiter; _position++; _fieldStart = _position; continue; } switch (b) { case Lf: columns.Add(ByteString.Empty); state = State.LineEnd; _position++; _fieldStart = _position; break; case Cr: columns.Add(ByteString.Empty); state = State.LineEnd; _position++; ReadPastLf(); _fieldStart = _position; break; default: fieldBuilder.Add(b); state = State.WithinField; _position++; break; } break; case State.WithinField: if (b == _escapeChar) { if (_position + 1 >= buffer.Count) { NoCharEscaped(); } if (buffer[_position + 1] != _escapeChar && buffer[_position + 1] != _delimiter) { WrongCharEscaped(); } fieldBuilder.Init(buffer[_position + 1]); state = State.WithinField; _position += 2; continue; } if (b == _delimiter) { columns.Add(fieldBuilder.Result(_position)); state = State.AfterDelimiter; _position++; _fieldStart = _position; continue; } switch (b) { case Lf: columns.Add(fieldBuilder.Result(_position)); state = State.LineEnd; _position++; _fieldStart = _position; break; case Cr: columns.Add(fieldBuilder.Result(_position)); state = State.LineEnd; _position++; ReadPastLf(); _fieldStart = _position; break; default: fieldBuilder.Add(b); state = State.WithinField; _position++; break; } break; case State.QuoteStarted: if (b == _escapeChar && _escapeChar != _quoteChar) { if (_position + 1 >= buffer.Count) { NoCharEscaped(); } if (buffer[_position + 1] != _escapeChar && buffer[_position + 1] != _quoteChar) { WrongCharEscapedWithinQuotes(); } fieldBuilder.Init(buffer[_position + 1]); state = State.WithinQuotedField; _position += 2; continue; } if (b == _quoteChar) { if (_position + 1 < buffer.Count && buffer[_position + 1] == _quoteChar) { fieldBuilder.Init(b); state = State.WithinQuotedField; _position += 2; continue; } state = State.QuoteEnd; _position++; continue; } fieldBuilder.Add(b); state = State.WithinQuotedField; _position++; break; case State.QuoteEnd: if (b == _delimiter) { columns.Add(fieldBuilder.Result(_position - 1)); state = State.AfterDelimiter; _position++; _fieldStart = _position; continue; } switch (b) { case Lf: columns.Add(fieldBuilder.Result(_position - 1)); state = State.LineEnd; _position++; _fieldStart = _position; break; case Cr: columns.Add(fieldBuilder.Result(_position - 1)); state = State.LineEnd; _position++; ReadPastLf(); _fieldStart = _position; break; default: throw new MalformedCsvException($"Expected delimiter or end of line at {_currentLineNo}:{_position}"); } break; case State.WithinQuotedField: if (b == _escapeChar && _escapeChar != _quoteChar) { if (_position + 1 >= buffer.Count) { NoCharEscaped(); } if (buffer[_position + 1] != _escapeChar && buffer[_position + 1] != _quoteChar) { WrongCharEscapedWithinQuotes(); } fieldBuilder.Init(buffer[_position + 1]); state = State.WithinQuotedField; _position += 2; continue; } if (b == _quoteChar) { if (_position + 1 < buffer.Count && buffer[_position + 1] == _quoteChar) { fieldBuilder.Init(b); state = State.WithinQuotedField; _position += 2; continue; } state = State.QuoteEnd; _position++; continue; } fieldBuilder.Add(b); state = State.WithinQuotedField; _position++; break; } } if (requireLineEnd) { if (state == State.LineEnd) { return(columns); } return(null); } switch (state) { case State.AfterDelimiter: columns.Add(ByteString.Empty); return(columns); case State.WithinQuotedField: return(null); case State.WithinField: columns.Add(fieldBuilder.Result(_position)); return(columns); case State.QuoteEnd: columns.Add(fieldBuilder.Result(_position - 1)); return(columns); } return(columns); }