public static IEnumerable <IEnumerable <string> > ParseCSV(this IEnumerable <char> stream, bool trim = false, bool skipHeader = false, int columns = -1, bool skipIfMore = false, bool addIfLess = false) { var enumerator = new CharIteratorWithStats(stream.AsLookAheadEnumerable().GetLookAheadEnumerator(), columns); var sb = new StringBuilder(); var ws = new StringBuilder(); if (skipHeader && enumerator.HasNext) { enumerator.DetectCount = columns < 0; foreach (var value in parseCSVRow(enumerator, trim, skipIfMore, addIfLess, sb, ws)) { ; } } enumerator.DetectCount = columns < 0; while (enumerator.HasNext) { yield return(parseCSVRow(enumerator, trim, skipIfMore, addIfLess, sb, ws)); } }
private static IEnumerable <string> parseCSVRow(CharIteratorWithStats enumerator, bool trim, bool skipIfMore, bool addIfLess, StringBuilder sb, StringBuilder ws) { if (sb.Length != 0) { throw new CSVParserException("{0}.ParseCSVRow(!sb.empty)".Args(enumerator.GetType().Name), enumerator.Line, enumerator.Column); } if (ws.Length != 0) { throw new CSVParserException("{0}.ParseCSVRow(!ws.empty)".Args(enumerator.GetType().Name), enumerator.Line, enumerator.Column); } var state = State.None; enumerator.Count = 0; while (enumerator.MoveNext()) { var c = enumerator.Current; if (('\n' == c || '\r' == c) && state != State.Quote) { if ('\r' == c && enumerator.HasNext && '\n' == enumerator.Next) { enumerator.MoveNext(); } break; } if (char.IsWhiteSpace(c)) { ws.Append(c); continue; } switch (state) { case State.None: if ('"' == c && (ws.Length == 0 || trim)) { state = State.Quote; ws.Clear(); } else if (',' == c) { if (!trim) { sb.Append(ws.ToString()); } ws.Clear(); if (!(skipIfMore && enumerator.RequiredCount >= 0 && enumerator.Count >= enumerator.RequiredCount)) { enumerator.Count++; yield return(sb.ToString()); } sb.Clear(); } else { state = State.Value; if (!trim) { sb.Append(ws.ToString()); } ws.Clear(); sb.Append(c); } break; case State.Value: if (',' == c) { state = State.None; if (!trim) { sb.Append(ws.ToString()); } ws.Clear(); if (!(skipIfMore && enumerator.RequiredCount >= 0 && enumerator.Count >= enumerator.RequiredCount)) { enumerator.Count++; yield return(sb.ToString()); } sb.Clear(); } else { sb.Append(ws.ToString()); ws.Clear(); sb.Append(c); } break; case State.Quote: if ('"' == c) { sb.Append(ws.ToString()); ws.Clear(); if (enumerator.HasNext && '"' == enumerator.Next) { sb.Append(c); enumerator.MoveNext(); } else { state = State.AfterQuote; } } else { sb.Append(ws.ToString()); ws.Clear(); sb.Append(c); } break; case State.AfterQuote: if (',' == c) { state = State.None; if (!trim) { sb.Append(ws.ToString()); } ws.Clear(); if (!(skipIfMore && enumerator.RequiredCount >= 0 && enumerator.Count >= enumerator.RequiredCount)) { enumerator.Count++; yield return(sb.ToString()); } sb.Clear(); } else { throw new CSVParserException("{0}.parseCSVRow()".Args(enumerator.GetType().Name), enumerator.Line, enumerator.Column); } break; } } if (!trim && state != State.AfterQuote) { sb.Append(ws.ToString()); } else if (!trim && state == State.AfterQuote && ws.Length != 0) { throw new CSVParserException("{0}.ParseCSVRow()".Args(enumerator.GetType().Name), enumerator.Line, enumerator.Column); } ws.Clear(); if (!(skipIfMore && enumerator.RequiredCount >= 0 && enumerator.Count >= enumerator.RequiredCount)) { enumerator.Count++; yield return(sb.ToString()); } if (addIfLess && enumerator.RequiredCount >= 0) { for (; enumerator.Count < enumerator.RequiredCount; enumerator.Count++) { yield return(string.Empty); } } if (enumerator.RequiredCount >= 0 && enumerator.Count != enumerator.RequiredCount) { throw new CSVParserException("{0}.parseCSVRow(Count!=RequiredCount)({1}!={2})".Args(enumerator.GetType().Name, enumerator.Count, enumerator.RequiredCount), enumerator.Line, enumerator.Column); } if (enumerator.RequiredCount < 0 && enumerator.DetectCount) { enumerator.RequiredCount = enumerator.Count; } sb.Clear(); }