public void LoadSpec() { using (ITabularReader r = new CsvReader(this.SpecFilePath, false)) { while (r.NextRow()) { string command = r.Current(0).ToString(); string columnName = r.Current(1).ToString(); switch (command.ToLowerInvariant()) { case "sample": // Sample,ColumnName,Probability this.SampleColumnName = columnName; this.SampleProbability = double.Parse(r.Current(2).ToString()); if (this.SampleProbability < 0.0 || this.SampleProbability > 1.0) { throw new UsageException($"SanitizeSpec sample probability ({r.Current(2)}) is out of range. It must be between zero and one."); } break; case "echo": // Echo,Value1,Value2,... for (int i = 1; i < r.CurrentRowColumns; ++i) { this.EchoValues.Add(r.Current(i).ToString8()); } break; case "drop": // Drop,ColumnName this.DropColumns.Add(columnName); break; case "map": // Map,ColumnName,MapperName this.HandlersByColumn.Add(columnName, new EchoColumnHandler(this.EchoValues, new MapColumnHandler(this.HashKeyHash, this.Provider.Mapper(r.Current(2).ToString())))); break; case "keep": // Keep is the default behavior, so it is in the sanispec only as a comment break; case "regex": // Regex,ColumnName,Expression,MapperName MapColumnHandler handler = new MapColumnHandler(this.HashKeyHash, this.Provider.Mapper(r.Current(3).ToString())); this.HandlersByColumn.Add(columnName, new EchoColumnHandler(this.EchoValues, new RegexColumnHandler(r.Current(2).ToString(), handler))); break; default: throw new UsageException($"SanitizeSpec mode '{command}' is unknown. Supported modes: sample, echo, drop, map, keep, regex."); } } } }
private List <Result> GetResultsFromStream(Stream input) { var results = new List <Result>(); using (_parser = new CsvReader(input)) { while (_parser.NextRow()) { results.Add(ParseResult(_parser.Current())); } } return(results); }
public void CsvReader_Basics() { int bufferSize = 64; // Empty file - verify no rows using (CsvReader reader = new CsvReader(StreamFromString(""), bufferSize)) { Assert.False(reader.NextRow()); // Verify double-dispose causes no problem reader.Dispose(); } // Single row file, no trailing newline using (CsvReader reader = new CsvReader(StreamFromString("One,Two,Three"), bufferSize)) { Assert.Equal(0, reader.RowCountRead); Assert.True(reader.NextRow()); Assert.Equal(1, reader.RowCountRead); Assert.Equal("One|Two|Three", string.Join("|", reader.Current())); Assert.False(reader.NextRow()); Assert.Equal(1, reader.RowCountRead); } // Empty values using (CsvReader reader = new CsvReader(StreamFromString(",Value,,"), bufferSize)) { Assert.True(reader.NextRow()); Assert.Equal("|Value||", string.Join("|", reader.Current())); Assert.False(reader.NextRow()); } // Newline variation and trailing newline using (CsvReader reader = new CsvReader(StreamFromString("One\nTwo\r\nThree\r\n"), bufferSize)) { Assert.True(reader.NextRow()); Assert.Equal("One", string.Join("|", reader.Current())); Assert.True(reader.NextRow()); Assert.Equal("Two", string.Join("|", reader.Current())); Assert.True(reader.NextRow()); Assert.Equal("Three", string.Join("|", reader.Current())); Assert.False(reader.NextRow()); } // Row requiring a buffer resize and verify nothing is missed string oneHundredColumns = string.Join(",", Enumerable.Range(100, 100).Select(i => i.ToString())); using (CsvReader reader = new CsvReader(StreamFromString(oneHundredColumns), bufferSize)) { Assert.True(reader.NextRow()); Assert.Equal(100, reader.Current().Count); Assert.False(reader.NextRow()); } // Value exactly 2x buffer, requiring two buffer resizes to be read string valueRequiringBufferExpand = new string('0', 128); using (CsvReader reader = new CsvReader(StreamFromString($"One,Two,Three\r\nSecond,Row\r\n{valueRequiringBufferExpand}"), bufferSize)) { Assert.True(reader.NextRow()); Assert.Equal("One|Two|Three", string.Join("|", reader.Current())); Assert.True(reader.NextRow()); Assert.Equal("Second|Row", string.Join("|", reader.Current())); Assert.True(reader.NextRow()); Assert.Single(reader.Current()); Assert.Equal(valueRequiringBufferExpand, reader.Current()[0]); } // '\r' exactly at buffer boundary, requiring refill to track the unread '\n' to ignore using (CsvReader reader = new CsvReader(StreamFromString($"{new string('0', 63)}\r\nNextRow\r\n"), bufferSize)) { Assert.True(reader.NextRow()); Assert.Equal(new string('0', 63), string.Join("|", reader.Current())); Assert.True(reader.NextRow()); Assert.Equal("NextRow", string.Join("|", reader.Current())); Assert.False(reader.NextRow()); } // Quoted value variations - quoted empty, quotes at start/end, quote in middle, unquoted after quoted, adjacent escaped quotes using (CsvReader reader = new CsvReader(StreamFromString("\"\",\"\"\"Around\"\"\",\"With\"\"in\",None,\"Many\"\"\"\"\"\nNextRow"), bufferSize)) { Assert.True(reader.NextRow()); Assert.Equal("|\"Around\"|With\"in|None|Many\"\"", string.Join("|", reader.Current())); Assert.True(reader.NextRow()); Assert.Equal("NextRow", string.Join("|", reader.Current())); Assert.False(reader.NextRow()); } // Exception for unescaped quote not at cell end using (CsvReader reader = new CsvReader(StreamFromString("\"Unescaped\"Quote,"), bufferSize)) { Assert.Throws <IOException>(() => reader.NextRow()); } }