private void MissingFieldUnquotedTest2(MissingFieldAction action) { const string Data = "a,b,c,d\n1,1,1,1\n2,2,2\n3,3,3,3"; try { // With bufferSize = 10, faulty new line char is at the start of next buffer read using (CsvReader csv = new CsvReader(new StringReader(Data), false, 7)) { csv.MissingFieldAction = action; while (csv.ReadNextRecord()) { for (int i = 0; i < csv.FieldCount; i++) { string s = csv[i]; if (csv.CurrentRecordIndex == 2) { if (i > 1) { switch (action) { case MissingFieldAction.ReturnEmptyValue: Assert.AreEqual(string.Empty, s); break; case MissingFieldAction.ReturnNullValue: Assert.IsNull(s); break; case MissingFieldAction.ReturnPartiallyParsedValue: if (i == 2) { Assert.AreEqual("2", s); } else { Assert.AreEqual(string.Empty, s); } break; default: Assert.Fail(string.Format("'{0}' is not handled by this test.", action)); break; } } } } } } } catch (MissingFieldCsvException ex) { if (ex.CurrentRecordIndex == 2 && ex.CurrentPosition == 0) { throw ex; } } }
private static string[] Read(string data, char quote = '"', char delimiter = ',', char escape = '"', char comment = '#', bool hasHeaders = false, ValueTrimmingOptions trimmingOptions = ValueTrimmingOptions.None, MissingFieldAction missingFieldAction = MissingFieldAction.ParseError, QuotesInsideQuotedFieldAction quoteInsideQuotedFieldAction = QuotesInsideQuotedFieldAction.Ignore, bool skipEmptyLines = false) { var reader = ReadCsv.FromString(data, quote, delimiter, escape, comment, hasHeaders, trimmingOptions, missingFieldAction, skipEmptyLines, quoteInsideQuotedFieldAction); reader.Read(); string[] results = new string[reader.FieldCount]; reader.GetValues(results); return(results); }
/// <summary> /// Read a file as CSV, using specific behaviour, layout and conversion options. /// </summary> /// <param name="path">The full or relative path name</param> /// <param name="encoding">The encoding of the file.</param> /// <param name="quote">The quote character. Default '"'</param> /// <param name="delimiter">Field delimiter. Default ','</param> /// <param name="escape">Quote escape character (for quotes inside fields). Default '\'</param> /// <param name="comment">Comment marker. Default '#'</param> /// <param name="hasHeaders">Is the first line a header line (default false)?</param> /// <param name="trimmingOptions">How should fields be trimmed?</param> /// <param name="missingFieldAction">What should happen when a field is missing from a line?</param> /// <param name="skipEmptyLines">Should empty lines be skipped?</param> /// <param name="quotesInsideQuotedFieldAction">What should happen when a quote is found inside a quoted field?</param> /// <param name="cultureInfo">Culture info to be used for parsing culture-sensitive data (such as date/time and decimal numbers)</param> /// <returns>a DataReader instance to read the contents of the CSV file</returns> public static IEnumerable <T> FromString <T>( string input, char quote = '"', char delimiter = ',', char escape = '"', char comment = '#', bool hasHeaders = false, ValueTrimmingOptions trimmingOptions = ValueTrimmingOptions.UnquotedOnly, MissingFieldAction missingFieldAction = MissingFieldAction.ParseError, bool skipEmptyLines = true, QuotesInsideQuotedFieldAction quotesInsideQuotedFieldAction = QuotesInsideQuotedFieldAction.Ignore, CultureInfo cultureInfo = null) { var schema = new CsvSchemaBuilder(cultureInfo).From <T>().Schema; return(FromString( input, quote, delimiter, escape, comment, hasHeaders, trimmingOptions, missingFieldAction, skipEmptyLines, quotesInsideQuotedFieldAction, schema, cultureInfo) .AsEnumerable <T>()); }
private void CheckMissingFieldUnquoted(long recordCount, int fieldCount, long badRecordIndex, int badFieldIndex, int bufferSize, bool sequentialAccess, MissingFieldAction action) { // construct the csv data with template "00,01,02\n10,11,12\n...." and calculate expected error position long capacity = recordCount * (fieldCount * 2 + fieldCount - 1) + recordCount; Assert.IsTrue(capacity <= int.MaxValue); StringBuilder sb = new StringBuilder((int) capacity); int expectedErrorPosition = 0; for (long i = 0; i < recordCount; i++) { int realFieldCount; if (i == badRecordIndex) realFieldCount = badFieldIndex; else realFieldCount = fieldCount; for (int j = 0; j < realFieldCount; j++) { sb.Append(i); sb.Append(j); sb.Append(CsvReader.DefaultDelimiter); } sb.Length--; sb.Append('\n'); if (i == badRecordIndex) { expectedErrorPosition = sb.Length % bufferSize; // when eof is true, buffer is cleared and position is reset to 0, so exception will have CurrentPosition = 0 if (i == recordCount - 1) expectedErrorPosition = 0; } } // test csv using (CsvReader csv = new CsvReader(new StringReader(sb.ToString()), false, bufferSize)) { csv.MissingFieldAction = action; Assert.AreEqual(fieldCount, csv.FieldCount); while (csv.ReadNextRecord()) { Assert.AreEqual(fieldCount, csv.FieldCount); // if not sequential, directly test the missing field if (!sequentialAccess) CheckMissingFieldValueUnquoted(csv, badFieldIndex, badRecordIndex, badFieldIndex, expectedErrorPosition, sequentialAccess, action); for (int i = 0; i < csv.FieldCount; i++) CheckMissingFieldValueUnquoted(csv, i, badRecordIndex, badFieldIndex, expectedErrorPosition, sequentialAccess, action); } } }
/// <summary> /// Constructs a CsvBehaviour instance that can be used to drive the csv parser /// </summary> /// <param name="trimmingOptions">How should fields be trimmed?</param> /// <param name="missingFieldAction">What should happen when a field is missing from a line?</param> /// <param name="skipEmptyLines">Should empty lines be skipped?</param> /// <param name="quotesInsideQuotedFieldAction">What should happen when a quote is found inside a quoted field?</param> public CsvBehaviour( ValueTrimmingOptions trimmingOptions = ValueTrimmingOptions.UnquotedOnly, MissingFieldAction missingFieldAction = MissingFieldAction.ParseError, bool skipEmptyLines = true, QuotesInsideQuotedFieldAction quotesInsideQuotedFieldAction = QuotesInsideQuotedFieldAction.Ignore) { _trimmingOptions = trimmingOptions; _missingFieldAction = missingFieldAction; _skipEmptyLines = skipEmptyLines; _quotesInsideQuotedFieldAction = quotesInsideQuotedFieldAction; }
/// <summary> /// Constructs a CsvBehaviour instance that can be used to drive the csv parser /// </summary> /// <param name="trimmingOptions">How should fields be trimmed?</param> /// <param name="missingFieldAction">What should happen when a field is missing from a line?</param> /// <param name="skipEmptyLines">Should empty lines be skipped?</param> /// <param name="quotesInsideQuotedFieldAction">What should happen when a quote is found inside a quoted field?</param> public CsvBehaviour( ValueTrimmingOptions trimmingOptions = ValueTrimmingOptions.UnquotedOnly, MissingFieldAction missingFieldAction = MissingFieldAction.ParseError, bool skipEmptyLines = true, QuotesInsideQuotedFieldAction quotesInsideQuotedFieldAction = QuotesInsideQuotedFieldAction.Ignore) { TrimmingOptions = trimmingOptions; MissingFieldAction = missingFieldAction; SkipEmptyLines = skipEmptyLines; QuotesInsideQuotedFieldAction = quotesInsideQuotedFieldAction; }
/// <summary> /// Read a string as CSV, using specific behaviour, layout and conversion options /// </summary> /// <param name="input">The CSV input</param> /// <param name="quote">The quote character. Default '"'</param> /// <param name="delimiter">Field delimiter. Default ','</param> /// <param name="escape">Quote escape character (for quotes inside fields). Default '\'</param> /// <param name="comment">Comment marker. Default '#'</param> /// <param name="hasHeaders">Is the first line a header line (default false)?</param> /// <param name="trimmingOptions">How should fields be trimmed?</param> /// <param name="missingFieldAction">What should happen when a field is missing from a line?</param> /// <param name="skipEmptyLines">Should empty lines be skipped?</param> /// <param name="quotesInsideQuotedFieldAction">What should happen when a quote is found inside a quoted field?</param> /// <param name="converter">Converter class for converting strings to primitive types (used by the data reader</param> /// <param name="bufferSize">The number of characters to buffer while parsing the CSV.</param> /// <returns>a datareader instance to read the contents of the CSV file</returns> public static IDataReader FromString( string input, char quote = '"', char delimiter = ',', char escape = '"', char comment = '#', bool hasHeaders = false, ValueTrimmingOptions trimmingOptions = ValueTrimmingOptions.UnquotedOnly, MissingFieldAction missingFieldAction = MissingFieldAction.ParseError, bool skipEmptyLines = true, QuotesInsideQuotedFieldAction quotesInsideQuotedFieldAction = QuotesInsideQuotedFieldAction.Ignore, IConverter converter = null, int bufferSize = 4096) { var reader = new StringReader(input); var layout = new CsvLayout(quote, delimiter, escape, comment, hasHeaders); var behaviour = new CsvBehaviour(trimmingOptions, missingFieldAction, skipEmptyLines, quotesInsideQuotedFieldAction); return(FromReader(reader, layout, behaviour, converter ?? Converter.Default, bufferSize)); }
/// <summary> /// Read a string as CSV, using specific behaviour, layout and conversion options /// </summary> /// <param name="input">The CSV input</param> /// <param name="quote">The quote character. Default '"'</param> /// <param name="delimiter">Field delimiter. Default ','</param> /// <param name="escape">Quote escape character (for quotes inside fields). Default '\'</param> /// <param name="comment">Comment marker. Default '#'</param> /// <param name="hasHeaders">Is the first line a header line (default false)?</param> /// <param name="trimmingOptions">How should fields be trimmed?</param> /// <param name="missingFieldAction">What should happen when a field is missing from a line?</param> /// <param name="skipEmptyLines">Should empty lines be skipped?</param> /// <param name="quotesInsideQuotedFieldAction">What should happen when a quote is found inside a quoted field?</param> /// <param name="schema">The CSV schema.</param> /// <param name="cultureInfo">Culture info to be used for parsing culture-sensitive data (such as date/time and decimal numbers)</param> /// <returns>a DataReader instance to read the contents of the CSV file</returns> public static IDataReader FromString( string input, char quote = '"', char delimiter = ',', char escape = '"', char comment = '#', bool hasHeaders = false, ValueTrimmingOptions trimmingOptions = ValueTrimmingOptions.None, MissingFieldAction missingFieldAction = MissingFieldAction.ParseError, bool skipEmptyLines = true, QuotesInsideQuotedFieldAction quotesInsideQuotedFieldAction = QuotesInsideQuotedFieldAction.Ignore, CsvSchema schema = null, CultureInfo cultureInfo = null) { var reader = new StringReader(input); var layout = new CsvLayout(quote, delimiter, escape, comment, hasHeaders, schema); var behaviour = new CsvBehaviour(trimmingOptions, missingFieldAction, skipEmptyLines, quotesInsideQuotedFieldAction); return(FromReader(reader, layout, behaviour, cultureInfo)); }
/// <summary> /// Read a file as CSV, using specific behaviour, layout and conversion options. Make sure to dispose the datareader. /// </summary> /// <param name="path">The full or relative path name</param> /// <param name="encoding">The encoding of the file. Default is UTF8.</param> /// <param name="quote">The quote character. Default '"'</param> /// <param name="delimiter">Field delimiter. Default ','</param> /// <param name="escape">Quote escape character (for quotes inside fields). Default '\'</param> /// <param name="comment">Comment marker. Default '#'</param> /// <param name="hasHeaders">Is the first line a header line (default false)?</param> /// <param name="trimmingOptions">How should fields be trimmed?</param> /// <param name="missingFieldAction">What should happen when a field is missing from a line?</param> /// <param name="skipEmptyLines">Should empty lines be skipped?</param> /// <param name="quotesInsideQuotedFieldAction">What should happen when a quote is found inside a quoted field?</param> /// <param name="converter">Converter class for converting strings to primitive types (used by the data reader). When none is specified, System.Convert is used.</param> /// <param name="bufferSize">The number of characters to buffer while parsing the CSV.</param> /// <returns>a datareader instance to read the contents of the CSV file</returns> public static IDataReader FromFile( string path, Encoding encoding = null, char quote = '"', char delimiter = ',', char escape = '"', char comment = '#', bool hasHeaders = false, ValueTrimmingOptions trimmingOptions = ValueTrimmingOptions.UnquotedOnly, MissingFieldAction missingFieldAction = MissingFieldAction.ParseError, bool skipEmptyLines = true, QuotesInsideQuotedFieldAction quotesInsideQuotedFieldAction = QuotesInsideQuotedFieldAction.Ignore, IConverter converter = null, int bufferSize = 4096) { // caller should dispose IDataReader, which will indirectly also close the stream var layout = new CsvLayout(quote, delimiter, escape, comment, hasHeaders); var behaviour = new CsvBehaviour(trimmingOptions, missingFieldAction, skipEmptyLines, quotesInsideQuotedFieldAction); var stream = File.OpenRead(path); var reader = new StreamReader(stream, encoding ?? Encoding.UTF8); return(FromReader(reader, layout, behaviour, converter ?? Converter.Default, bufferSize)); }
private void CheckMissingFieldUnquoted(long recordCount, int fieldCount, long badRecordIndex, int badFieldIndex, int bufferSize, bool sequentialAccess, MissingFieldAction action) { // construct the csv data with template "00,01,02\n10,11,12\n...." and calculate expected error position long capacity = recordCount * (fieldCount * 2 + fieldCount - 1) + recordCount; Assert.IsTrue(capacity <= int.MaxValue); StringBuilder sb = new StringBuilder((int)capacity); int expectedErrorPosition = 0; for (long i = 0; i < recordCount; i++) { int realFieldCount; if (i == badRecordIndex) { realFieldCount = badFieldIndex; } else { realFieldCount = fieldCount; } for (int j = 0; j < realFieldCount; j++) { sb.Append(i); sb.Append(j); sb.Append(CsvReader.DefaultDelimiter); } sb.Length--; sb.Append('\n'); if (i == badRecordIndex) { expectedErrorPosition = sb.Length % bufferSize; // when eof is true, buffer is cleared and position is reset to 0, so exception will have CurrentPosition = 0 if (i == recordCount - 1) { expectedErrorPosition = 0; } } } // test csv using (CsvReader csv = new CsvReader(new StringReader(sb.ToString()), false, bufferSize)) { csv.MissingFieldAction = action; Assert.AreEqual(fieldCount, csv.FieldCount); while (csv.ReadNextRecord()) { Assert.AreEqual(fieldCount, csv.FieldCount); // if not sequential, directly test the missing field if (!sequentialAccess) { CheckMissingFieldValueUnquoted(csv, badFieldIndex, badRecordIndex, badFieldIndex, expectedErrorPosition, sequentialAccess, action); } for (int i = 0; i < csv.FieldCount; i++) { CheckMissingFieldValueUnquoted(csv, i, badRecordIndex, badFieldIndex, expectedErrorPosition, sequentialAccess, action); } } } }
private void CheckMissingFieldValueUnquoted(CsvReader csv, int fieldIndex, long badRecordIndex, int badFieldIndex, int expectedErrorPosition, bool sequentialAccess, MissingFieldAction action) { const string Message = "RecordIndex={0}; FieldIndex={1}; Position={2}; Sequential={3}; Action={4}"; // make sure s contains garbage as to not have false successes string s = "asdfasdfasdf"; try { s = csv[fieldIndex]; } catch (MissingFieldCsvException ex) { Assert.AreEqual(badRecordIndex, ex.CurrentRecordIndex, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action); Assert.IsTrue(fieldIndex >= badFieldIndex, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action); Assert.AreEqual(expectedErrorPosition, ex.CurrentPosition, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action); return; } if (csv.CurrentRecordIndex != badRecordIndex || fieldIndex < badFieldIndex) { Assert.AreEqual(csv.CurrentRecordIndex.ToString() + fieldIndex.ToString(), s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); } else { switch (action) { case MissingFieldAction.ReplaceByEmpty: Assert.AreEqual(string.Empty, s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); break; case MissingFieldAction.ReplaceByNull: Assert.IsNull(s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); break; case MissingFieldAction.ParseError: Assert.Fail("Failed to throw ParseError. - " + Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); break; default: Assert.Fail("'{0}' is not handled by this test.", action); break; } } }
private void CheckMissingFieldValueUnquoted(CsvReader csv, int fieldIndex, long badRecordIndex, int badFieldIndex, int expectedErrorPosition, bool sequentialAccess, MissingFieldAction action) { const string Message = "RecordIndex={0}; FieldIndex={1}; Position={2}; Sequential={3}; Action={4}"; // make sure s contains garbage as to not have false successes string s = "asdfasdfasdf"; try { s = csv[fieldIndex]; } catch (MissingFieldCsvException ex) { Assert.AreEqual(badRecordIndex, ex.CurrentRecordIndex, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action); Assert.IsTrue(fieldIndex >= badFieldIndex, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action); Assert.AreEqual(expectedErrorPosition, ex.CurrentPosition, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action); return; } if (csv.CurrentRecordIndex != badRecordIndex || fieldIndex < badFieldIndex) Assert.AreEqual(csv.CurrentRecordIndex.ToString() + fieldIndex.ToString(), s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); else { switch (action) { case MissingFieldAction.ReplaceByEmpty: Assert.AreEqual(string.Empty, s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); break; case MissingFieldAction.ReplaceByNull: Assert.IsNull(s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); break; case MissingFieldAction.ParseError: Assert.Fail("Failed to throw ParseError. - " + Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action); break; default: Assert.Fail("'{0}' is not handled by this test.", action); break; } } }